[mlpack-git] master: Refactor to handle 3rd order tensors correctly. (1b88e29)

Sat Aug 29 08:23:37 EDT 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ea45ace1ff744390a4c35183528eda881eda5c61...fd336238de224ed72fc23b84e1e2f02ae3c879d6

>---------------------------------------------------------------

commit 1b88e2901406fc17b2bc09abc8ba76b8576f468a
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Wed Aug 26 18:18:18 2015 +0200

    Refactor to handle 3rd order tensors correctly.


>---------------------------------------------------------------

1b88e2901406fc17b2bc09abc8ba76b8576f468a
 src/mlpack/methods/ann/layer/linear_layer.hpp | 199 ++++++++++++++++++++++----
 1 file changed, 173 insertions(+), 26 deletions(-)

diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp
index 7b48ad3..55c313e 100644
--- a/src/mlpack/methods/ann/layer/linear_layer.hpp
+++ b/src/mlpack/methods/ann/layer/linear_layer.hpp
@@ -22,13 +22,16 @@ namespace ann /** Artificial Neural Network. */ {
  *
  * @tparam OptimizerType Type of the optimizer used to update the weights.
  * @tparam WeightInitRule Rule used to initialize the weight matrix.
- * @tparam DataType Type of data (arma::colvec, arma::mat arma::sp_mat or
- * arma::cube).
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
  */
 template <
     template<typename, typename> class OptimizerType = mlpack::ann::RMSPROP,
     class WeightInitRule = NguyenWidrowInitialization,
-    typename DataType = arma::mat
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
 >
 class LinearLayer
 {
@@ -48,7 +51,9 @@ class LinearLayer
       outSize(outSize),
       optimizer(new OptimizerType<LinearLayer<OptimizerType,
                                               WeightInitRule,
-                                              DataType>, DataType>(*this)),
+                                              InputDataType,
+                                              OutputDataType>,
+                                              OutputDataType>(*this)),
       ownsOptimizer(true)
   {
     weightInitRule.Initialize(weights, outSize, inSize);
@@ -77,6 +82,30 @@ class LinearLayer
   }
 
   /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Mat<eT>& output)
+  {
+    arma::Mat<eT> data(input.n_elem, 1);
+
+    for (size_t s = 0, c = 0; s < input.n_slices / data.n_cols; s++)
+    {
+      for (size_t i = 0; i < data.n_cols; i++, c++)
+      {
+        data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) *
+            input.n_rows * input.n_cols - 1) = arma::vectorise(input.slice(c));
+      }
+    }
+
+    output = weights * data;
+  }
+
+  /**
    * Ordinary feed backward pass of a neural network, calculating the function
    * f(x) by propagating x backwards trough f. Using the results from the feed
    * forward pass.
@@ -85,8 +114,8 @@ class LinearLayer
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  template<typename eT>
-  void Backward(const arma::Mat<eT>& /* unused */,
+  template<typename InputType, typename eT>
+  void Backward(const InputType& /* unused */,
                 const arma::Mat<eT>& gy,
                 arma::Mat<eT>& g)
   {
@@ -96,45 +125,142 @@ class LinearLayer
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
+   * @param d The calculated error.
    * @param g The calculated gradient.
    */
-  template<typename eT>
-  void Gradient(arma::Mat<eT>& g)
+  template<typename eT, typename GradientDataType>
+  void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
   {
-    g = delta * parameter.t();
+    GradientDelta(inputParameter, d, g);
   }
 
   //! Get the optimizer.
   OptimizerType<LinearLayer<OptimizerType,
                             WeightInitRule,
-                            DataType>, DataType>& Optimizer() const
+                            InputDataType,
+                            OutputDataType>, OutputDataType>& Optimizer() const
   {
     return *optimizer;
   }
   //! Modify the optimizer.
   OptimizerType<LinearLayer<OptimizerType,
                             WeightInitRule,
-                            DataType>, DataType>& Optimizer()
+                            InputDataType,
+                            OutputDataType>, OutputDataType>& Optimizer()
   {
     return *optimizer;
   }
 
   //! Get the weights.
-  DataType& Weights() const { return weights; }
+  OutputDataType& Weights() const { return weights; }
   //! Modify the weights.
-  DataType& Weights() { return weights; }
+  OutputDataType& Weights() { return weights; }
 
-  //! Get the parameter.
-  DataType& Parameter() const {return parameter; }
-  //! Modify the parameter.
-  DataType& Parameter() { return parameter; }
+  //! Get the input parameter.
+  InputDataType& InputParameter() const {return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const {return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
 
   //! Get the delta.
-  DataType& Delta() const {return delta; }
+  OutputDataType& Delta() const {return delta; }
   //! Modify the delta.
-  DataType& Delta() { return delta; }
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType& Gradient() const {return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
 
  private:
+   /*
+   * Calculate the gradient using the output delta (3rd order tensor) and the
+   * input activation (3rd order tensor).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Cube<eT>& input,
+                     const arma::Mat<eT>& d,
+                     arma::Cube<eT>& g)
+  {
+    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
+    arma::Mat<eT> data = arma::Mat<eT>(d.n_cols,
+        input.n_elem / d.n_cols);
+
+    for (size_t s = 0, c = 0; s < input.n_slices /
+        data.n_rows; s++)
+    {
+      for (size_t i = 0; i < data.n_rows; i++, c++)
+      {
+        data.row(i).subvec(s * input.n_rows *
+            input.n_cols, (s + 1) *
+            input.n_rows *
+            input.n_cols - 1) = arma::vectorise(
+                input.slice(c), 1);
+      }
+    }
+
+    g.slice(0) = d * data / d.n_cols;
+  }
+
+  /*
+   * Calculate the gradient (3rd order tensor) using the output delta
+   * (dense matrix) and the input activation (dense matrix).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Mat<eT>& /* input unused */,
+                     const arma::Mat<eT>& d,
+                     arma::Cube<eT>& g)
+  {
+    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
+    Gradient(d, g.slice(0));
+  }
+
+  /*
+   * Calculate the gradient (dense matrix) using the output delta
+   * (dense matrix) and the input activation (3rd order tensor).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Cube<eT>& /* input unused */,
+                     const arma::Mat<eT>& d,
+                     arma::Mat<eT>& g)
+  {
+    arma::Cube<eT> grad = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
+    Gradient(d, grad);
+    g = grad.slice(0);
+  }
+
+  /*
+   * Calculate the gradient (dense matrix) using the output delta
+   * (dense matrix) and the input activation (dense matrix).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Mat<eT>& input,
+                     const arma::Mat<eT>& d,
+                     arma::Mat<eT>& g)
+  {
+    g = d * input.t();
+  }
+
   //! Locally-stored number of input units.
   const size_t inSize;
 
@@ -142,30 +268,51 @@ class LinearLayer
   const size_t outSize;
 
   //! Locally-stored weight object.
-  DataType weights;
+  OutputDataType weights;
 
   //! Locally-stored delta object.
-  DataType delta;
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
 
-  //! Locally-stored parameter object.
-  DataType parameter;
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
 
   //! Locally-stored pointer to the optimzer object.
   OptimizerType<LinearLayer<OptimizerType,
                             WeightInitRule,
-                            DataType>, DataType>* optimizer;
+                            InputDataType,
+                            OutputDataType>, OutputDataType>* optimizer;
 
   //! Parameter that indicates if the class owns a optimizer object.
   bool ownsOptimizer;
 }; // class LinearLayer
 
+/**
+ * Linear Mapping layer to map between 3rd order tensors and dense matrices.
+ */
+template <
+    template<typename, typename> class OptimizerType = mlpack::ann::RMSPROP,
+    class WeightInitRule = NguyenWidrowInitialization,
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::mat
+>
+using LinearMappingLayer = LinearLayer<
+    OptimizerType, WeightInitRule, InputDataType, OutputDataType>;
+
 //! Layer traits for the linear layer.
 template<
     template<typename, typename> class OptimizerType,
     typename WeightInitRule,
-    typename DataType
+    typename InputDataType,
+    typename OutputDataType
 >
-class LayerTraits<LinearLayer<OptimizerType, WeightInitRule, DataType> >
+class LayerTraits<LinearLayer<
+    OptimizerType, WeightInitRule, InputDataType, OutputDataType> >
 {
  public:
   static const bool IsBinary = false;