[mlpack-git] master: Refactor feedforward network test for new network API. (967adc4)

Sat Aug 29 09:03:18 EDT 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/3413a77502387c942a12ac47a7e2cc966ed2ddcd...967adc4746d7e638422cdf1c373d2865ff4e8d4c

>---------------------------------------------------------------

commit 967adc4746d7e638422cdf1c373d2865ff4e8d4c
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Sat Aug 29 15:01:34 2015 +0200

    Refactor feedforward network test for new network API.


>---------------------------------------------------------------

967adc4746d7e638422cdf1c373d2865ff4e8d4c
 src/mlpack/methods/ann/ffn.hpp                 |   9 +-
 src/mlpack/methods/ann/optimizer/rmsprop.hpp   |  10 +-
 src/mlpack/methods/ann/trainer/trainer.hpp     |  15 +-
 src/mlpack/tests/activation_functions_test.cpp | 333 +++++++++++--------------
 4 files changed, 160 insertions(+), 207 deletions(-)

diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
index 4eab5fe..05d430d 100644
--- a/src/mlpack/methods/ann/ffn.hpp
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -52,7 +52,6 @@ class FFN
      * @param input Input data used to evaluate the network.
      * @param target Target data used to calculate the network error.
      * @param error The calulated error of the output layer.
-     * @tparam DataType Type of data (arma::colvec, arma::mat or arma::sp_mat).
      */
     template <typename InputType, typename TargetType, typename ErrorType>
     void FeedForward(const InputType& input,
@@ -68,7 +67,6 @@ class FFN
      * error of the output layer.
      *
      * @param error The calulated error of the output layer.
-     * @tparam DataType Type of data (arma::colvec, arma::mat or arma::sp_mat).
      */
     template <typename InputType, typename ErrorType>
     void FeedBackward(const InputType& /* unused */, const ErrorType& error)
@@ -94,7 +92,6 @@ class FFN
      *
      * @param input Input data used to evaluate the network.
      * @param output Output data used to store the output activation
-     * @tparam DataType Type of data (arma::colvec, arma::mat or arma::sp_mat).
      */
     template <typename DataType>
     void Predict(const DataType& input, DataType& output)
@@ -113,7 +110,6 @@ class FFN
      * @param input Input data used to evaluate the trained network.
      * @param target Target data used to calculate the network error.
      * @param error The calulated error of the output layer.
-     * @tparam VecType Type of data (arma::colvec, arma::mat or arma::sp_mat).
      */
     template <typename InputType, typename TargetType, typename ErrorType>
     double Evaluate(const InputType& input,
@@ -243,8 +239,8 @@ class FFN
                        const std::tuple<Tp...>& t)
     {
       // Calculate and store the output error.
-      outputLayer.CalculateError(std::get<sizeof...(Tp) - 1>(t).OutputParameter(),
-          target, error);
+      outputLayer.CalculateError(
+          std::get<sizeof...(Tp) - 1>(t).OutputParameter(), target, error);
 
       // Masures the network's performance with the specified performance
       // function.
@@ -394,7 +390,6 @@ class FFN
     bool deterministic;
 }; // class FFN
 
-
 //! Network traits for the FFN network.
 template <
   typename LayerTypes,
diff --git a/src/mlpack/methods/ann/optimizer/rmsprop.hpp b/src/mlpack/methods/ann/optimizer/rmsprop.hpp
index a29be5a..063236d 100644
--- a/src/mlpack/methods/ann/optimizer/rmsprop.hpp
+++ b/src/mlpack/methods/ann/optimizer/rmsprop.hpp
@@ -80,13 +80,12 @@ class RMSPROP
   {
     if (gradient.n_elem != 0)
     {
-      DataType outputGradient;
-      function.Gradient(outputGradient);
+      DataType outputGradient = function.Gradient();
       gradient += outputGradient;
     }
     else
     {
-      function.Gradient(gradient);
+      gradient = function.Gradient();
     }
   }
 
@@ -98,6 +97,11 @@ class RMSPROP
     gradient.zeros();
   }
 
+  //! Get the gradient.
+  DataType& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  DataType& Gradient() { return gradient; }
+
  private:
   /**
    * Optimize the given function using RmsProp.
diff --git a/src/mlpack/methods/ann/trainer/trainer.hpp b/src/mlpack/methods/ann/trainer/trainer.hpp
index 1f1aa4d..cbb6521 100644
--- a/src/mlpack/methods/ann/trainer/trainer.hpp
+++ b/src/mlpack/methods/ann/trainer/trainer.hpp
@@ -24,12 +24,10 @@ namespace ann /** Artificial Neural Network. */ {
  * @tparam NetworkType The type of network which should be trained and
  * evaluated.
  * @tparam MaType Type of the error type (arma::mat or arma::sp_mat).
- * @tparam VecType Type of error type (arma::colvec, arma::mat or arma::sp_mat).
  */
 template<
   typename NetworkType,
-  typename MatType = arma::mat,
-  typename VecType = arma::colvec
+  typename MatType = arma::mat
 >
 class Trainer
 {
@@ -86,6 +84,8 @@ class Trainer
           ElementCount(trainingData) - 1, ElementCount(trainingData));
       epoch = 0;
 
+      size_t foo = 0;
+
       while(true)
       {
         if (shuffle)
@@ -147,7 +147,7 @@ class Trainer
             Element(target, index(i)), error);
 
         trainingError += net.Error();
-        net.FeedBackward(error);
+        net.FeedBackward(Element(data, index(i)), error);
 
         if (((i + 1) % batchSize) == 0)
           net.ApplyGradients();
@@ -189,10 +189,10 @@ class Trainer
      */
     template<typename eT>
     typename std::enable_if<!NetworkTraits<NetworkType>::IsCNN,
-        arma::Col<eT> >::type
+        arma::Mat<eT> >::type
     Element(arma::Mat<eT>& input, const size_t colNum)
     {
-      return arma::Col<eT>(input.colptr(colNum), input.n_rows, false, true);
+      return arma::Mat<eT>(input.colptr(colNum), input.n_rows, 1, false, true);
     }
 
     /*
@@ -248,8 +248,7 @@ class Trainer
     NetworkType& net;
 
     //! The current network error of a single input.
-    typename std::conditional<NetworkTraits<NetworkType>::IsFNN,
-        VecType, MatType>::type error;
+    MatType error;
 
     //! The current epoch if maxEpochs is set.
     size_t epoch;
diff --git a/src/mlpack/tests/activation_functions_test.cpp b/src/mlpack/tests/activation_functions_test.cpp
index c852684..f1d8557 100644
--- a/src/mlpack/tests/activation_functions_test.cpp
+++ b/src/mlpack/tests/activation_functions_test.cpp
@@ -12,14 +12,15 @@
 #include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
 #include <mlpack/methods/ann/activation_functions/rectifier_function.hpp>
 
-#include <mlpack/methods/ann/ffnn.hpp>
+#include <mlpack/methods/ann/ffn.hpp>
 #include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/layer/neuron_layer.hpp>
+#include <mlpack/methods/ann/optimizer/rmsprop.hpp>
+#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
+
 #include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/multiclass_classification_layer.hpp>
-#include <mlpack/methods/ann/connections/full_connection.hpp>
-#include <mlpack/methods/ann/connections/self_connection.hpp>
-#include <mlpack/methods/ann/optimizer/irpropp.hpp>
+#include <mlpack/methods/ann/layer/linear_layer.hpp>
+#include <mlpack/methods/ann/layer/base_layer.hpp>
+#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
 
 #include <boost/test/unit_test.hpp>
 #include "old_boost_test_definitions.hpp"
@@ -198,188 +199,142 @@ BOOST_AUTO_TEST_CASE(RectifierFunctionTest)
       desiredDerivatives);
 }
 
-// /*
-//  * Implementation of the numerical gradient checking.
-//  *
-//  * @param input Input data used for evaluating the network.
-//  * @param target Target data used to calculate the network error.
-//  * @param perturbation Constant perturbation value.
-//  * @param threshold Threshold used as bounding check.
-//  *
-//  * @tparam ActivationFunction Activation function used for the gradient check.
-//  */
-// template<class ActivationFunction>
-// void CheckGradientNumericallyCorrect(const arma::colvec input,
-//                                      const arma::colvec target,
-//                                      const double perturbation,
-//                                      const double threshold)
-// {
-//   // Specify the structure of the feed forward neural network.
-//   RandomInitialization randInit(-0.5, 0.5);
-//   arma::colvec error;
-
-//   NeuronLayer<ActivationFunction> inputLayer(input.n_elem);
-
-//   BiasLayer<> biasLayer0(1);
-//   BiasLayer<> biasLayer1(1);
-//   BiasLayer<> biasLayer2(1);
-
-//   NeuronLayer<ActivationFunction> hiddenLayer0(4);
-//   NeuronLayer<ActivationFunction> hiddenLayer1(2);
-//   NeuronLayer<ActivationFunction> hiddenLayer2(target.n_elem);
-
-//   iRPROPp< > conOptimizer0(input.n_elem, hiddenLayer0.InputSize());
-//   iRPROPp< > conOptimizer1(1, 4);
-//   iRPROPp< > conOptimizer2(4, 2);
-//   iRPROPp< > conOptimizer3(1, 2);
-//   iRPROPp< > conOptimizer4(2, target.n_elem);
-//   iRPROPp< > conOptimizer5(1, target.n_elem);
-
-//   ClassificationLayer outputLayer;
-
-//   FullConnection<
-//       decltype(inputLayer),
-//       decltype(hiddenLayer0),
-//       decltype(conOptimizer0),
-//       decltype(randInit)>
-//       layerCon0(inputLayer, hiddenLayer0, conOptimizer0, randInit);
-
-//   FullConnection<
-//     decltype(biasLayer0),
-//     decltype(hiddenLayer0),
-//     decltype(conOptimizer1),
-//     decltype(randInit)>
-//     layerCon1(biasLayer0, hiddenLayer0, conOptimizer1, randInit);
-
-//   FullConnection<
-//       decltype(hiddenLayer0),
-//       decltype(hiddenLayer1),
-//       decltype(conOptimizer2),
-//       decltype(randInit)>
-//       layerCon2(hiddenLayer0, hiddenLayer1, conOptimizer2, randInit);
-
-//   FullConnection<
-//     decltype(biasLayer1),
-//     decltype(hiddenLayer1),
-//     decltype(conOptimizer3),
-//     decltype(randInit)>
-//     layerCon3(biasLayer1, hiddenLayer1, conOptimizer3, randInit);
-
-//   FullConnection<
-//       decltype(hiddenLayer1),
-//       decltype(hiddenLayer2),
-//       decltype(conOptimizer4),
-//       decltype(randInit)>
-//       layerCon4(hiddenLayer1, hiddenLayer2, conOptimizer4, randInit);
-
-//   FullConnection<
-//     decltype(biasLayer2),
-//     decltype(hiddenLayer2),
-//     decltype(conOptimizer5),
-//     decltype(randInit)>
-//     layerCon5(biasLayer2, hiddenLayer2, conOptimizer5, randInit);
-
-//   auto module0 = std::tie(layerCon0, layerCon1);
-//   auto module1 = std::tie(layerCon2, layerCon3);
-//   auto module2 = std::tie(layerCon4, layerCon5);
-//   auto modules = std::tie(module0, module1, module2);
-
-//   FFNN<decltype(modules), decltype(outputLayer)> net(modules, outputLayer);
-
-//   // Initialize the feed forward neural network.
-//   net.FeedForward(input, target, error);
-//   net.FeedBackward(error);
-
-//   std::vector<std::reference_wrapper<
-//       FullConnection<
-//       decltype(inputLayer),
-//       decltype(hiddenLayer0),
-//       decltype(conOptimizer0),
-//       decltype(randInit)> > > layer {layerCon0, layerCon2, layerCon4};
-
-//   std::vector<arma::mat> gradient {
-//       hiddenLayer0.Delta() * inputLayer.InputActivation().t(),
-//       hiddenLayer1.Delta() * hiddenLayer0.InputActivation().t(),
-//       hiddenLayer2.Delta() * hiddenLayer1.InputActivation().t() };
-
-//   double weight, mLoss, pLoss, dW, e;
-
-//   for (size_t l = 0; l < layer.size(); ++l)
-//   {
-//     for (size_t i = 0; i < layer[l].get().Weights().n_rows; ++i)
-//     {
-//       for (size_t j = 0; j < layer[l].get().Weights().n_cols; ++j)
-//       {
-//         // Store original weight.
-//         weight = layer[l].get().Weights()(i, j);
-
-//         // Add negative perturbation and compute error.
-//         layer[l].get().Weights().at(i, j) -= perturbation;
-//         net.FeedForward(input, target, error);
-//         mLoss = arma::as_scalar(0.5 * arma::sum(arma::pow(error, 2)));
-
-//         // Add positive perturbation and compute error.
-//         layer[l].get().Weights().at(i, j) += (2 * perturbation);
-//         net.FeedForward(input, target, error);
-//         pLoss = arma::as_scalar(0.5 * arma::sum(arma::pow(error, 2)));
-
-//         // Compute symmetric difference.
-//         dW = (pLoss - mLoss) / (2 * perturbation);
-//         e = std::abs(dW - gradient[l].at(i, j));
-
-//         bool b = e < threshold;
-//         BOOST_REQUIRE_EQUAL(b, 1);
-
-//         // Restore original weight.
-//         layer[l].get().Weights().at(i, j) = weight;
-//       }
-//     }
-//   }
-// }
-
-// /**
-//  * The following test implements numerical gradient checking. It computes the
-//  * numerical gradient, a numerical approximation of the partial derivative of J
-//  * with respect to the i-th input argument, evaluated at g. The numerical
-//  * gradient should be approximately the partial derivative of J with respect to
-//  * g(i).
-//  *
-//  * Given a function g(\theta) that is supposedly computing:
-//  *
-//  * @f[
-//  * \frac{\partial}{\partial \theta} J(\theta)
-//  * @f]
-//  *
-//  * we can now numerically verify its correctness by checking:
-//  *
-//  * @f[
-//  * g(\theta) \approx \frac{J(\theta + eps) - J(\theta - eps)}{2 * eps}
-//  * @f]
-//  */
-// BOOST_AUTO_TEST_CASE(GradientNumericallyCorrect)
-// {
-//   // Initialize dataset.
-//   const arma::colvec input = arma::randu<arma::colvec>(10);
-//   const arma::colvec target("0 1;");
-
-//   // Perturbation and threshold constant.
-//   const double perturbation = 1e-6;
-//   const double threshold = 1e-7;
-
-//   CheckGradientNumericallyCorrect<LogisticFunction>(input, target,
-//       perturbation, threshold);
-
-//   CheckGradientNumericallyCorrect<IdentityFunction>(input, target,
-//       perturbation, threshold);
-
-//   CheckGradientNumericallyCorrect<RectifierFunction>(input, target,
-//       perturbation, threshold);
-
-//   CheckGradientNumericallyCorrect<SoftsignFunction>(input, target,
-//       perturbation, threshold);
-
-//   CheckGradientNumericallyCorrect<TanhFunction>(input, target,
-//       perturbation, threshold);
-// }
+/*
+ * Implementation of the numerical gradient checking.
+ *
+ * @param input Input data used for evaluating the network.
+ * @param target Target data used to calculate the network error.
+ * @param perturbation Constant perturbation value.
+ * @param threshold Threshold used as bounding check.
+ *
+ * @tparam ActivationFunction Activation function used for the gradient check.
+ */
+template<class ActivationFunction>
+void CheckGradientNumericallyCorrect(const arma::mat input,
+                                     const arma::mat target,
+                                     const double perturbation,
+                                     const double threshold)
+{
+  // Specify the structure of the feed forward neural network.
+  RandomInitialization randInit(-0.5, 0.5);
+  arma::mat error;
+
+  // Number of hidden layer units.
+  const size_t hiddenLayerSize = 4;
+
+  LinearLayer<mlpack::ann::RMSPROP, RandomInitialization> linearLayer0(
+        input.n_rows, hiddenLayerSize, randInit);
+  BiasLayer<> biasLayer0(hiddenLayerSize);
+  BaseLayer<ActivationFunction> baseLayer0;
+
+  LinearLayer<mlpack::ann::RMSPROP, RandomInitialization> linearLayer1(
+         hiddenLayerSize, hiddenLayerSize, randInit);
+  BiasLayer<> biasLayer1(hiddenLayerSize);
+  BaseLayer<ActivationFunction> baseLayer1;
+
+  LinearLayer<mlpack::ann::RMSPROP, RandomInitialization> linearLayer2(
+         hiddenLayerSize, target.n_rows, randInit);
+  BiasLayer<> biasLayer2(target.n_rows);
+  BaseLayer<ActivationFunction> baseLayer2;
+
+  BinaryClassificationLayer classOutputLayer;
+
+  auto modules = std::tie(linearLayer0, biasLayer0, baseLayer0,
+                          linearLayer1, biasLayer1, baseLayer1,
+                          linearLayer2, biasLayer2, baseLayer2);
+
+  FFN<decltype(modules), decltype(classOutputLayer), MeanSquaredErrorFunction>
+      net(modules, classOutputLayer);
+
+  // Initialize the feed forward neural network.
+  net.FeedForward(input, target, error);
+  net.FeedBackward(input, error);
+
+  std::vector<std::reference_wrapper<decltype(linearLayer0)> > layer {
+         linearLayer0, linearLayer1, linearLayer2 };
+
+  std::vector<arma::mat> gradient {linearLayer0.Gradient(),
+                                   linearLayer1.Gradient(),
+                                   linearLayer2.Gradient()};
+
+  double weight, mLoss, pLoss, dW, e;
+
+  for (size_t l = 0; l < layer.size(); ++l)
+  {
+    for (size_t i = 0; i < layer[l].get().Weights().n_rows; ++i)
+    {
+      for (size_t j = 0; j < layer[l].get().Weights().n_cols; ++j)
+      {
+        // Store original weight.
+        weight = layer[l].get().Weights()(i, j);
+
+        // Add negative perturbation and compute error.
+        layer[l].get().Weights().at(i, j) -= perturbation;
+        net.FeedForward(input, target, error);
+        mLoss = arma::as_scalar(0.5 * arma::sum(arma::pow(error, 2)));
+
+        // Add positive perturbation and compute error.
+        layer[l].get().Weights().at(i, j) += (2 * perturbation);
+        net.FeedForward(input, target, error);
+        pLoss = arma::as_scalar(0.5 * arma::sum(arma::pow(error, 2)));
+
+        // Compute symmetric difference.
+        dW = (pLoss - mLoss) / (2 * perturbation);
+        e = std::abs(dW - gradient[l].at(i, j));
+
+        bool b = e < threshold;
+        BOOST_REQUIRE_EQUAL(b, 1);
+
+        // Restore original weight.
+        layer[l].get().Weights().at(i, j) = weight;
+      }
+    }
+  }
+}
+
+/**
+ * The following test implements numerical gradient checking. It computes the
+ * numerical gradient, a numerical approximation of the partial derivative of J
+ * with respect to the i-th input argument, evaluated at g. The numerical
+ * gradient should be approximately the partial derivative of J with respect to
+ * g(i).
+ *
+ * Given a function g(\theta) that is supposedly computing:
+ *
+ * @f[
+ * \frac{\partial}{\partial \theta} J(\theta)
+ * @f]
+ *
+ * we can now numerically verify its correctness by checking:
+ *
+ * @f[
+ * g(\theta) \approx \frac{J(\theta + eps) - J(\theta - eps)}{2 * eps}
+ * @f]
+ */
+BOOST_AUTO_TEST_CASE(GradientNumericallyCorrect)
+{
+  // Initialize dataset.
+  const arma::colvec input = arma::randu<arma::colvec>(10);
+  const arma::colvec target("0 1;");
+
+  // Perturbation and threshold constant.
+  const double perturbation = 1e-6;
+  const double threshold = 1e-5;
+
+  CheckGradientNumericallyCorrect<LogisticFunction>(input, target,
+      perturbation, threshold);
+
+  CheckGradientNumericallyCorrect<IdentityFunction>(input, target,
+      perturbation, threshold);
+
+  CheckGradientNumericallyCorrect<RectifierFunction>(input, target,
+      perturbation, threshold);
+
+  CheckGradientNumericallyCorrect<SoftsignFunction>(input, target,
+      perturbation, threshold);
+
+  CheckGradientNumericallyCorrect<TanhFunction>(input, target,
+      perturbation, threshold);
+}
 
 BOOST_AUTO_TEST_SUITE_END();