[mlpack-git] master: Avoid overflow by subtracting the maximum of the input values from each input. (d9e984e)

Thu Jul 2 16:36:41 EDT 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/222a7e191f8a7925a4870ce1acbd68589899dfde...d9e984e1c608679171ad52e8522916703c7b331f

>---------------------------------------------------------------

commit d9e984e1c608679171ad52e8522916703c7b331f
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Thu Jul 2 22:36:34 2015 +0200

    Avoid overflow by subtracting the maximum of the input values from each input.


>---------------------------------------------------------------

d9e984e1c608679171ad52e8522916703c7b331f
 src/mlpack/methods/ann/layer/softmax_layer.hpp | 125 ++++++++++++++++++++-----
 1 file changed, 99 insertions(+), 26 deletions(-)

diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/softmax_layer.hpp
index 44e26e3..b47ec05 100644
--- a/src/mlpack/methods/ann/layer/softmax_layer.hpp
+++ b/src/mlpack/methods/ann/layer/softmax_layer.hpp
@@ -16,12 +16,11 @@ namespace ann /** Artificial Neural Network. */ {
 /**
  * An implementation of a standard softmax layer.
  *
- * @tparam MatType Type of data (arma::mat or arma::sp_mat).
- * @tparam VecType Type of data (arma::colvec, arma::mat or arma::sp_mat).
+ * @tparam DataType Type of data (arma::colvec, arma::mat arma::sp_mat or
+ * arma::cube).
  */
-template <typename MatType = arma::mat, typename VecType = arma::colvec>
+template <typename DataType = arma::colvec>
 class SoftmaxLayer
-
 {
  public:
   /**
@@ -30,9 +29,55 @@ class SoftmaxLayer
    * @param layerSize The number of neurons.
    */
   SoftmaxLayer(const size_t layerSize) :
-      inputActivations(arma::zeros<VecType>(layerSize)),
-      delta(arma::zeros<VecType>(layerSize)),
-      layerSize(layerSize)
+      inputActivations(arma::zeros<DataType>(layerSize)),
+      delta(arma::zeros<DataType>(layerSize)),
+      layerRows(layerSize),
+      layerCols(1),
+      layerSlices(1),
+      outputMaps(1)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Create 2-dimensional SoftmaxLayer object using the specified rows and
+   * columns. In this case, DataType must be arma::mat or arma::sp_mat.
+   *
+   * @param layerRows The number of rows of neurons.
+   * @param layerCols The number of columns of neurons.
+   */
+  SoftmaxLayer(const size_t layerRows, const size_t layerCols) :
+      inputActivations(arma::zeros<DataType>(layerRows, layerCols)),
+      delta(arma::zeros<DataType>(layerRows, layerCols)),
+      layerRows(layerRows),
+      layerCols(layerCols),
+      layerSlices(1),
+      outputMaps(1)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Create n-dimensional SoftmaxLayer object using the specified rows and
+   * columns and number of slices. In this case, DataType must be arma::cube.
+   *
+   * @param layerRows The number of rows of neurons.
+   * @param layerCols The number of columns of neurons.
+   * @param layerCols The number of slices of neurons.
+   * @param layerCols The number of output maps.
+   */
+  SoftmaxLayer(const size_t layerRows,
+               const size_t layerCols,
+               const size_t layerSlices,
+               const size_t outputMaps = 1) :
+      inputActivations(arma::zeros<DataType>(layerRows, layerCols,
+          layerSlices * outputMaps)),
+      delta(arma::zeros<DataType>(layerRows, layerCols,
+          layerSlices * outputMaps)),
+      layerRows(layerRows),
+      layerCols(layerCols),
+      layerSlices(layerSlices),
+      outputMaps(outputMaps)
   {
     // Nothing to do here.
   }
@@ -45,9 +90,10 @@ class SoftmaxLayer
    * activity function.
    * @param outputActivation Data to store the resulting output activation.
    */
-  void FeedForward(const VecType& inputActivation, VecType& outputActivation)
+  void FeedForward(const DataType& inputActivation, DataType& outputActivation)
   {
-    outputActivation = arma::trunc_exp(inputActivation);
+    outputActivation = arma::trunc_exp(inputActivation -
+        arma::repmat(arma::max(inputActivation), inputActivation.n_rows, 1));
     outputActivation /= arma::accu(outputActivation);
   }
 
@@ -61,48 +107,75 @@ class SoftmaxLayer
    * @param delta The calculating delta using the partial derivative of the
    * error with respect to a weight.
    */
-  void FeedBackward(const VecType& /* unused */,
-                    const VecType& error,
-                    VecType& delta)
+  void FeedBackward(const DataType& /* unused */,
+                    const DataType& error,
+                    DataType& delta)
   {
     delta = error;
   }
 
   //! Get the input activations.
-  VecType& InputActivation() const { return inputActivations; }
+  DataType& InputActivation() const { return inputActivations; }
   //! Modify the input activations.
-  VecType& InputActivation() { return inputActivations; }
+  DataType& InputActivation() { return inputActivations; }
 
   //! Get the detla.
-  VecType& Delta() const { return delta; }
+  DataType& Delta() const { return delta; }
   //! Modify the delta.
-  VecType& Delta() { return delta; }
+  DataType& Delta() { return delta; }
 
   //! Get input size.
-  size_t InputSize() const { return layerSize; }
+  size_t InputSize() const { return layerRows; }
   //! Modify the delta.
-  size_t& InputSize() { return layerSize; }
+  size_t& InputSize() { return layerRows; }
 
   //! Get output size.
-  size_t OutputSize() const { return layerSize; }
+  size_t OutputSize() const { return layerRows; }
   //! Modify the output size.
-  size_t& OutputSize() { return layerSize; }
+  size_t& OutputSize() { return layerRows; }
+
+  //! Get the number of layer rows.
+  size_t LayerRows() const { return layerRows; }
+  //! Modify the number of layer rows.
+  size_t& LayerRows() { return layerRows; }
+
+  //! Get the number of layer columns.
+  size_t LayerCols() const { return layerCols; }
+  //! Modify the number of layer columns.
+  size_t& LayerCols() { return layerCols; }
 
   //! Get the number of layer slices.
-  size_t LayerSlices() const { return 1; }
+  size_t LayerSlices() const { return layerSlices; }
 
   //! Get the number of output maps.
-  size_t OutputMaps() const { return 1; }
+  size_t OutputMaps() const { return outputMaps; }
+
+  //! The the value of the deterministic parameter.
+  bool Deterministic() const {return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() {return deterministic; }
 
  private:
   //! Locally-stored input activation object.
-  VecType inputActivations;
+  DataType inputActivations;
 
   //! Locally-stored delta object.
-  VecType delta;
+  DataType delta;
+
+  //! Locally-stored number of layer rows.
+  size_t layerRows;
+
+  //! Locally-stored number of layer cols.
+  size_t layerCols;
+
+  //! Locally-stored number of layer slices.
+  size_t layerSlices;
+
+  //! Locally-stored number of output maps.
+  size_t outputMaps;
 
-  //! Locally-stored number of neurons.
-  size_t layerSize;
+  //! Locally-stored deterministic parameter.
+  bool deterministic;
 }; // class SoftmaxLayer
 
 }; // namespace ann