[mlpack-git] master: Add option to set the input parameter when calculating the gradient. (fb3994c)

Sun Apr 10 09:49:35 EDT 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/592fdcda156aa58aba3209017b585e7cfd12c345...fb3994c48e59d47b51931d9e7fbfa4777f181ca3

>---------------------------------------------------------------

commit fb3994c48e59d47b51931d9e7fbfa4777f181ca3
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Sun Apr 10 15:49:35 2016 +0200

    Add option to set the input parameter when calculating the gradient.


>---------------------------------------------------------------

fb3994c48e59d47b51931d9e7fbfa4777f181ca3
 src/mlpack/methods/ann/cnn.hpp                     |  6 ++---
 src/mlpack/methods/ann/ffn.hpp                     |  6 ++---
 src/mlpack/methods/ann/layer/bias_layer.hpp        | 29 ++++++----------------
 src/mlpack/methods/ann/layer/conv_layer.hpp        |  9 ++++---
 src/mlpack/methods/ann/layer/dropconnect_layer.hpp | 11 +++++---
 src/mlpack/methods/ann/layer/empty_layer.hpp       |  6 +++--
 src/mlpack/methods/ann/layer/linear_layer.hpp      | 29 ++++++++++++----------
 src/mlpack/methods/ann/layer/lstm_layer.hpp        | 18 ++++++++++++--
 src/mlpack/methods/ann/layer/recurrent_layer.hpp   |  7 ++++--
 src/mlpack/methods/ann/layer/sparse_bias_layer.hpp |  7 ++++--
 .../methods/ann/layer/sparse_input_layer.hpp       | 12 +++++----
 .../methods/ann/layer/sparse_output_layer.hpp      | 10 ++++----
 src/mlpack/methods/ann/rnn.hpp                     | 16 ++++++------
 13 files changed, 93 insertions(+), 73 deletions(-)

diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp
index e1872e4..56d6be2 100644
--- a/src/mlpack/methods/ann/cnn.hpp
+++ b/src/mlpack/methods/ann/cnn.hpp
@@ -378,15 +378,15 @@ class CNN
 
   template<typename T, typename P, typename D>
   typename std::enable_if<
-      HasGradientCheck<T, void(T::*)(const D&, P&)>::value, void>::type
+      HasGradientCheck<T, P&(T::*)()>::value, void>::type
   Update(T& layer, P& /* unused */, D& delta)
   {
-    layer.Gradient(delta, layer.Gradient());
+    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
   }
 
   template<typename T, typename P, typename D>
   typename std::enable_if<
-      !HasGradientCheck<T, void(T::*)(const P&, D&)>::value, void>::type
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
   Update(T& /* unused */, P& /* unused */, D& /* unused */)
   {
     /* Nothing to do here */
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
index 2bed68a..43972c7 100644
--- a/src/mlpack/methods/ann/ffn.hpp
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -380,15 +380,15 @@ private:
 
   template<typename T, typename P, typename D>
   typename std::enable_if<
-      HasGradientCheck<T, void(T::*)(const D&, P&)>::value, void>::type
+      HasGradientCheck<T, P&(T::*)()>::value, void>::type
   Update(T& layer, P& /* unused */, D& delta)
   {
-    layer.Gradient(delta, layer.Gradient());
+    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
   }
 
   template<typename T, typename P, typename D>
   typename std::enable_if<
-      !HasGradientCheck<T, void(T::*)(const P&, D&)>::value, void>::type
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
   Update(T& /* unused */, P& /* unused */, D& /* unused */)
   {
     /* Nothing to do here */
diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp
index 343d281..83edd48 100644
--- a/src/mlpack/methods/ann/layer/bias_layer.hpp
+++ b/src/mlpack/methods/ann/layer/bias_layer.hpp
@@ -97,29 +97,16 @@ class BiasLayer
   /*
    * Calculate the gradient using the output delta and the bias.
    *
-   * @param d The calculated error.
-   * @param g The calculated gradient.
+   * @param input The propagated input.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
    */
-  template<typename eT>
-  void Gradient(const arma::Cube<eT>& d, InputDataType& g)
-  {
-    g = arma::Mat<eT>(weights.n_rows, weights.n_cols);
-    for (size_t s = 0; s < d.n_slices; s++)
-    {
-      g(s) = arma::accu(d.slice(s)) * bias;
-    }
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the bias.
-   *
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Gradient(const arma::Mat<eT>& d, InputDataType& g)
+  template<typename eT, typename ErrorType, typename GradientType>
+  void Gradient(const arma::Mat<eT>& /* input */,
+                const ErrorType& error,
+                GradientType& gradient)
   {
-    g = d * bias;
+    gradient = error * bias;
   }
 
   //! Get the weights.
diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp
index aeb6c07..4a3cc79 100644
--- a/src/mlpack/methods/ann/layer/conv_layer.hpp
+++ b/src/mlpack/methods/ann/layer/conv_layer.hpp
@@ -134,11 +134,14 @@ class ConvLayer
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
+   * @param input The input parameter used for calculating the gradient.
    * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename eT>
-  void Gradient(const arma::Cube<eT>& d, arma::Cube<eT>& g)
+  template<typename InputType, typename eT>
+  void Gradient(const InputType& input,
+                const arma::Cube<eT>& d,
+                arma::Cube<eT>& g)
   {
     g = arma::zeros<arma::Cube<eT> >(weights.n_rows, weights.n_cols,
         weights.n_slices);
@@ -147,7 +150,7 @@ class ConvLayer
     {
       for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps)
       {
-        arma::Cube<eT> inputSlices = inputParameter.slices(inMap, inMap);
+        arma::Cube<eT> inputSlices = input.slices(inMap, inMap);
         arma::Cube<eT> deltaSlices = d.slices(outMap, outMap);
 
         arma::Cube<eT> output;
diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
index bd8fe6e..826a7a8 100644
--- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
+++ b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
@@ -175,22 +175,25 @@ class DropConnectLayer
   /**
    * Calculate the gradient using the output delta and the input activation.
    *
+   * @param input The propagated input.
    * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename eT, typename GradientDataType>
-  void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& input,
+                const arma::Mat<eT>& d,
+                GradientDataType& g)
   {
     if(uselayer)
     {
-      baseLayer.Gradient(d, g);
+      baseLayer.Gradient(input, d, g);
 
       // Denoise the weights.
       baseLayer.Weights() = denoise;
     }
     else
     {
-      g = d * inputParameter.t();
+      g = d * input.t();
 
       // Denoise the weights.
       weights = denoise;
diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/empty_layer.hpp
index 7eb58ec..0ff6d93 100644
--- a/src/mlpack/methods/ann/layer/empty_layer.hpp
+++ b/src/mlpack/methods/ann/layer/empty_layer.hpp
@@ -68,8 +68,10 @@ class EmptyLayer
    * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename ErrorType, typename GradientType>
-  void Gradient(const ErrorType& /* d */, GradientType& /* g */)
+  template<typename InputType, typename ErrorType, typename GradientType>
+  void Gradient(const InputType& /* input */,
+                const ErrorType& /* error */,
+                GradientType& /* gradient */)
   {
     /* Nothing to do here. */
   }
diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp
index b34e978..1108e94 100644
--- a/src/mlpack/methods/ann/layer/linear_layer.hpp
+++ b/src/mlpack/methods/ann/layer/linear_layer.hpp
@@ -73,7 +73,8 @@ class LinearLayer
       for (size_t i = 0; i < data.n_cols; i++, c++)
       {
         data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) *
-            input.n_rows * input.n_cols - 1) = arma::vectorise(input.slice(c));
+            input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise(
+            input.slice(c), 1));
       }
     }
 
@@ -97,17 +98,19 @@ class LinearLayer
     g = weights.t() * gy;
   }
   
-
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
-   * @param d The calculated error.
-   * @param g The calculated gradient.
+   * @param input The propagated input.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
    */
-  template<typename eT, typename GradientDataType>
-  void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+  template<typename InputType, typename ErrorType, typename GradientType>
+  void Gradient(const InputType& input,
+                const ErrorType& error,
+                GradientType& gradient)
   {
-    GradientDelta(inputParameter, d, g);
+    GradientDelta(input, error, gradient);
   }
 
   //! Get the weights.
@@ -145,7 +148,7 @@ class LinearLayer
   }
 
  private:
-   /*
+  /*
    * Calculate the gradient using the output delta (3rd order tensor) and the
    * input activation (3rd order tensor).
    *
@@ -170,7 +173,7 @@ class LinearLayer
         data.row(i).subvec(s * input.n_rows *
             input.n_cols, (s + 1) *
             input.n_rows *
-            input.n_cols - 1) = arma::vectorise(
+        input.n_cols - 1) = arma::vectorise(
                 input.slice(c), 1);
       }
     }
@@ -187,12 +190,12 @@ class LinearLayer
    * @param g The calculated gradient.
    */
   template<typename eT>
-  void GradientDelta(const arma::Mat<eT>& /* input unused */,
+  void GradientDelta(const arma::Mat<eT>& input,
                      const arma::Mat<eT>& d,
                      arma::Cube<eT>& g)
   {
     g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    Gradient(d, g.slice(0));
+    Gradient(input, d, g.slice(0));
   }
 
   /*
@@ -204,12 +207,12 @@ class LinearLayer
    * @param g The calculated gradient.
    */
   template<typename eT>
-  void GradientDelta(const arma::Cube<eT>& /* input unused */,
+  void GradientDelta(const arma::Cube<eT>& input,
                      const arma::Mat<eT>& d,
                      arma::Mat<eT>& g)
   {
     arma::Cube<eT> grad = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    Gradient(d, grad);
+    Gradient(input, d, grad);
     g = grad.slice(0);
   }
 
diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp
index 59c103c..86e4174 100644
--- a/src/mlpack/methods/ann/layer/lstm_layer.hpp
+++ b/src/mlpack/methods/ann/layer/lstm_layer.hpp
@@ -59,6 +59,10 @@ class LSTMLayer
       peepholeWeights.set_size(outSize, 3);
       peepholeDerivatives = arma::zeros<OutputDataType>(outSize, 3);
     }
+    else
+    {
+      peepholeWeights.set_size(0, 0);
+    }
   }  
 
   /**
@@ -90,6 +94,7 @@ class LSTMLayer
     // Split up the inputactivation into the 3 parts (inGate, forgetGate,
     // outGate).
     inGate.col(offset) = input.submat(0, 0, outSize - 1, 0);
+
     forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0);
     outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0);
 
@@ -226,8 +231,17 @@ class LSTMLayer
     offset = (offset + 1) % seqLen;
   }
 
-  template<typename eT, typename GradientDataType>
-  void Gradient(const arma::Mat<eT>& /* unused */, GradientDataType& /* unused */)
+  /**
+   * Ordinary feed backward pass of the lstm layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& /* input */,
+                const arma::Mat<eT>& /* gy */,
+                GradientDataType& /* g */)
   {
     if (peepholes && offset == 0)
     {
diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/recurrent_layer.hpp
index 639852e..616794c 100644
--- a/src/mlpack/methods/ann/layer/recurrent_layer.hpp
+++ b/src/mlpack/methods/ann/layer/recurrent_layer.hpp
@@ -90,11 +90,14 @@ class RecurrentLayer
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
+   * @param input The propagated input activation.
    * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename eT, typename GradientDataType>
-  void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& /* input */,
+                const arma::Mat<eT>& d,
+                GradientDataType& g)
   {
     g = d * recurrentParameter.t();
   }
diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
index 9b79536..f25f9f0 100644
--- a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
+++ b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
@@ -77,11 +77,14 @@ class SparseBiasLayer
   /*
    * Calculate the gradient using the output delta and the bias.
    *
+   * @param input The propagated input.
    * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename eT>
-  void Gradient(const arma::Mat<eT>& d, InputDataType& g)
+  template<typename InputType, typename eT>
+  void Gradient(const InputType& /* input */,
+                const arma::Mat<eT>& d,
+                InputDataType& g)
   {    
     g = arma::sum(d, 1) / static_cast<typename InputDataType::value_type>(
         batchSize);
diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
index ce5ce7d..2858695 100644
--- a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
+++ b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
@@ -82,15 +82,17 @@ class SparseInputLayer
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
+   * @param input The propagated input.
    * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename eT, typename GradientDataType>
-  void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& input,
+                const arma::Mat<eT>& d,
+                GradientDataType& g)
   {
-    g = d * inputParameter.t() /
-        static_cast<typename InputDataType::value_type>(inputParameter.n_cols) +
-        lambda * weights;
+    g = d * input.t() / static_cast<typename InputType::value_type>(
+        input.n_cols) + lambda * weights;
   }
 
   //! Get the weights.
diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
index 3022e2a..6245825 100644
--- a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
+++ b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
@@ -91,15 +91,15 @@ class SparseOutputLayer
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
+   * @param input The propagated input.
    * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename eT>
-  void Gradient(const arma::Mat<eT>& d, arma::Mat<eT>& g)
+  template<typename InputType, typename eT>
+  void Gradient(const InputType input, const arma::Mat<eT>& d, arma::Mat<eT>& g)
   {        
-    g = d * inputParameter.t() /
-        static_cast<typename InputDataType::value_type>(inputParameter.n_cols) +
-        lambda * weights;    
+    g = d * input.t() / static_cast<typename InputType::value_type>(
+        input.n_cols) + lambda * weights;
   }
   
   //! Sets the KL divergence parameter.
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
index 1389a87..be2bdd5 100644
--- a/src/mlpack/methods/ann/rnn.hpp
+++ b/src/mlpack/methods/ann/rnn.hpp
@@ -356,7 +356,7 @@ class RNN
    */
   template<typename T, typename P, typename D>
   typename std::enable_if<
-      HasGradientCheck<T, void(T::*)(const D&, P&)>::value, void>::type
+      HasGradientCheck<T, P&(T::*)()>::value, void>::type
   Init(T& layer, P& /* unused */, D& /* unused */)
   {
     // Initialize the input size only once.
@@ -368,7 +368,7 @@ class RNN
 
   template<typename T, typename P, typename D>
   typename std::enable_if<
-      !HasGradientCheck<T, void(T::*)(const P&, D&)>::value, void>::type
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
   Init(T& /* unused */, P& /* unused */, D& /* unused */)
   {
     /* Nothing to do here */
@@ -696,20 +696,20 @@ class RNN
   template<typename T1, typename P1, typename D1, typename T2, typename P2,
       typename D2>
   typename std::enable_if<
-      HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+      HasGradientCheck<T1, P1&(T1::*)()>::value &&
       HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
   Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */,
          P2& /* unused */, D2& delta2)
   {
-    layer.Gradient(delta2, layer.Gradient());
+    layer.Gradient(layer.InputParameter(), delta2, layer.Gradient());
   }
 
   template<typename T1, typename P1, typename D1, typename T2, typename P2,
       typename D2>
   typename std::enable_if<
-      (!HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
       !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value) ||
-      (!HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
       HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value), void>::type
   Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */,
          P2& /* unused */, D2& /* unused */)
@@ -720,12 +720,12 @@ class RNN
   template<typename T1, typename P1, typename D1, typename T2, typename P2,
       typename D2>
   typename std::enable_if<
-      HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+      HasGradientCheck<T1, P1&(T1::*)()>::value &&
       !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
   Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */,
          P2& /* unused */, D2& /* unused */)
   {
-    layer.Gradient(delta1, layer.Gradient());
+    layer.Gradient(layer.InputParameter(), delta1, layer.Gradient());
   }
 
   /*