[mlpack-git] master: Add LogSoftmaxLayer class; the log softmax loss layer computes the multinomial logistic loss of the softmax of its inputs. (460e326)

Mon Apr 11 10:09:17 EDT 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/fb3994c48e59d47b51931d9e7fbfa4777f181ca3...460e326cb2dd1025d45f424bc70967f8d76a4c2f

>---------------------------------------------------------------

commit 460e326cb2dd1025d45f424bc70967f8d76a4c2f
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Mon Apr 11 16:09:17 2016 +0200

    Add LogSoftmaxLayer class; the log softmax loss layer computes the multinomial logistic loss of the softmax of its inputs.


>---------------------------------------------------------------

460e326cb2dd1025d45f424bc70967f8d76a4c2f
 .../{softmax_layer.hpp => log_softmax_layer.hpp}   | 81 +++++++++++++---------
 1 file changed, 49 insertions(+), 32 deletions(-)

diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
similarity index 50%
copy from src/mlpack/methods/ann/layer/softmax_layer.hpp
copy to src/mlpack/methods/ann/layer/log_softmax_layer.hpp
index 151ebfe..de911a1 100644
--- a/src/mlpack/methods/ann/layer/softmax_layer.hpp
+++ b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
@@ -1,11 +1,11 @@
 /**
- * @file softmax_layer.hpp
+ * @file log_softmax_layer.hpp
  * @author Marcus Edel
  *
- * Definition of the SoftmaxLayer class.
+ * Definition of the LogSoftmaxLayer class.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
+#ifndef __MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
+#define __MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
@@ -13,8 +13,11 @@ namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * Implementation of the softmax layer. The softmax loss layer computes the
- * multinomial logistic loss of the softmax of its inputs.
+ * Implementation of the log softmax layer. The log softmax loss layer computes
+ * the multinomial logistic loss of the softmax of its inputs. This layer is
+ * meant to be used in combination with the negative log likelihood layer
+ * (NegativeLogLikelihoodLayer), which expects that the input contains
+ * log-probabilities for each class.  
  *
  * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
  *         arma::sp_mat or arma::cube).
@@ -25,16 +28,13 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class SoftmaxLayer
+class LogSoftmaxLayer
 {
  public:
   /**
-   * Create the SoftmaxLayer object.
+   * Create the LogSoftmaxLayer object.
    */
-  SoftmaxLayer()
-  {
-    // Nothing to do here.
-  }  
+  LogSoftmaxLayer() { /* Nothing to do here. */ }
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -46,9 +46,35 @@ class SoftmaxLayer
   template<typename eT>
   void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
   {
-    output = arma::trunc_exp(input -
-        arma::repmat(arma::max(input), input.n_rows, 1));
-    output /= arma::accu(output);
+    arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1);
+    output = (maxInput - input);
+
+    // Approximation of the hyperbolic tangent. The acuracy however is
+    // about 0.00001 lower as using tanh. Credits go to Leon Bottou.
+    output.transform( [](double x)
+    {
+      //! Fast approximation of exp(-x) for x positive.
+      static const double A0 = 1.0;
+      static const double A1 = 0.125;
+      static const double A2 = 0.0078125;
+      static const double A3 = 0.00032552083;
+      static const double A4 = 1.0172526e-5;
+
+      if (x < 13.0)
+      {
+        double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4)));
+        y *= y;
+        y *= y;
+        y *= y;
+        y = 1 / y;
+
+        return y;
+      }
+
+      return 0.0;
+    } );
+
+    output = input - (maxInput + std::log(arma::accu(output)));
   }
 
   /**
@@ -61,37 +87,28 @@ class SoftmaxLayer
    * @param g The calculated gradient.
    */
   template<typename eT>
-  void Backward(const arma::Mat<eT>& /* unused */,
+  void Backward(const arma::Mat<eT>& input,
                 const arma::Mat<eT>& gy,
                 arma::Mat<eT>& g)
   {
-    g = gy;
+    g = gy - arma::exp(input) * arma::accu(gy);
   }
 
   //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
+  InputDataType& InputParameter() const { return inputParameter; }
   //! Modify the input parameter.
   InputDataType& InputParameter() { return inputParameter; }
 
   //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
+  OutputDataType& OutputParameter() const { return outputParameter; }
   //! Modify the output parameter.
   OutputDataType& OutputParameter() { return outputParameter; }
 
   //! Get the delta.
-  InputDataType const& Delta() const { return delta; }
+  InputDataType& Delta() const { return delta; }
   //! Modify the delta.
   InputDataType& Delta() { return delta; }
 
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& /* ar */, const unsigned int /* version */)
-  {
-    /* Nothing to do here */
-  }
-
  private:
   //! Locally-stored delta object.
   OutputDataType delta;
@@ -101,9 +118,9 @@ class SoftmaxLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-}; // class SoftmaxLayer
+}; // class LogSoftmaxLayer
 
-} // namespace ann
-} // namespace mlpack
+}; // namespace ann
+}; // namespace mlpack
 
 #endif