[mlpack-git] master: Remove unused ann optimizer; For more information take a look at #555. (74d499a)

Fri Mar 18 11:44:26 EDT 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/5641b4de32a0616b62fbf4e220f09c0d0f257d7a...74d499aa051a48f006f2a2cac18fce5e5d007164

>---------------------------------------------------------------

commit 74d499aa051a48f006f2a2cac18fce5e5d007164
Author: marcus <marcus.edel at fu-berlin.de>
Date:   Fri Mar 18 16:44:26 2016 +0100

    Remove unused ann optimizer; For more information take a look at #555.


>---------------------------------------------------------------

74d499aa051a48f006f2a2cac18fce5e5d007164
 src/mlpack/methods/ann/CMakeLists.txt              |   1 -
 src/mlpack/methods/ann/optimizer/CMakeLists.txt    |  17 --
 src/mlpack/methods/ann/optimizer/ada_delta.hpp     | 175 --------------------
 src/mlpack/methods/ann/optimizer/adam.hpp          | 177 ---------------------
 src/mlpack/methods/ann/optimizer/rmsprop.hpp       | 163 -------------------
 .../methods/ann/optimizer/steepest_descent.hpp     | 146 -----------------
 6 files changed, 679 deletions(-)

diff --git a/src/mlpack/methods/ann/CMakeLists.txt b/src/mlpack/methods/ann/CMakeLists.txt
index 7dd9892..c62459a 100644
--- a/src/mlpack/methods/ann/CMakeLists.txt
+++ b/src/mlpack/methods/ann/CMakeLists.txt
@@ -23,7 +23,6 @@ set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 add_subdirectory(activation_functions)
 add_subdirectory(init_rules)
 add_subdirectory(layer)
-add_subdirectory(optimizer)
 add_subdirectory(performance_functions)
 add_subdirectory(trainer)
 add_subdirectory(pooling_rules)
diff --git a/src/mlpack/methods/ann/optimizer/CMakeLists.txt b/src/mlpack/methods/ann/optimizer/CMakeLists.txt
deleted file mode 100644
index 53b877e..0000000
--- a/src/mlpack/methods/ann/optimizer/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  ada_delta.hpp
-  adam.hpp
-  rmsprop.hpp
-  steepest_descent.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/optimizer/ada_delta.hpp b/src/mlpack/methods/ann/optimizer/ada_delta.hpp
deleted file mode 100644
index 76dc195..0000000
--- a/src/mlpack/methods/ann/optimizer/ada_delta.hpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * @file ada_delta.hpp
- * @author Marcus Edel
- *
- * Implementation of the Adadelta optimizer. Adadelta is an optimizer that
- * dynamically adapts over time using only first order information.
- * Additionally, Adadelta requires no manual tuning of a learning rate.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_ADA_DELTA_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_ADA_DELTA_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Adadelta is an optimizer that uses two ideas to improve upon the two main
- * drawbacks of the Adagrad method:
- *
- *  - Accumulate Over Window
- *  - Correct Units with Hessian Approximation
- *
- * For more information, see the following.
- *
- * @code
- * @article{Zeiler2012,
- *   author    = {Matthew D. Zeiler},
- *   title     = {{ADADELTA:} An Adaptive Learning Rate Method},
- *   journal   = {CoRR},
- *   year      = {2012}
- * }
- * @endcode
- */
-template<typename DecomposableFunctionType, typename DataType>
-class AdaDelta
-{
- public:
-  /**
-   * Construct the AdaDelta optimizer with the given function and parameters.
-   *
-   * @param function Function to be optimized (minimized).
-   * @param rho Constant interpolation parameter similar to that used in
-   *        Momentum methods.
-   * @param eps The eps coefficient to avoid division by zero (numerical
-   *        stability).
-   */
-  AdaDelta(DecomposableFunctionType& function,
-          const double rho = 0.95,
-          const double eps = 1e-6) :
-      function(function),
-      rho(rho),
-      eps(eps)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Optimize the given function using AdaDelta.
-   */
-  void Optimize()
-  {
-    if (meanSquaredGradient.n_elem == 0)
-    {
-      meanSquaredGradient = function.Weights();
-      meanSquaredGradient.zeros();
-
-      meanSquaredGradientDx = meanSquaredGradient;
-    }
-
-    Optimize(function.Weights(), gradient, meanSquaredGradient,
-        meanSquaredGradientDx);
-  }
-
-  /*
-   * Sum up all gradients and store the results in the gradients storage.
-   */
-  void Update()
-  {
-    if (gradient.n_elem != 0)
-    {
-      gradient += function.Gradient();
-    }
-    else
-    {
-      gradient = function.Gradient();
-    }
-  }
-
-  /*
-   * Reset the gradient storage.
-   */
-  void Reset()
-  {
-    gradient.zeros();
-  }
-
-  //! Get the gradient.
-  DataType& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  DataType& Gradient() { return gradient; }
-
- private:
-  /**
-   * Optimize the given function using AdaDelta.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param meanSquaredGradient The current mean squared gradient.
-   * @param meanSquaredGradientDx The current mean squared Dx gradient.
-   */
-  template<typename eT>
-  void Optimize(arma::Cube<eT>& weights,
-                arma::Cube<eT>& gradient,
-                arma::Cube<eT>& meanSquaredGradient,
-                arma::Cube<eT>& meanSquaredGradientDx)
-  {
-    for (size_t s = 0; s < weights.n_slices; s++)
-    {
-      Optimize(weights.slice(s), gradient.slice(s), meanSquaredGradient.slice(s),
-          meanSquaredGradientDx.slice(s));
-    }
-  }
-
-  /**
-   * Optimize the given function using AdaDelta.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param meanSquaredGradient The current mean squared gradient.
-   * @param meanSquaredGradientDx The current mean squared Dx gradient.
-   */
-  template<typename eT>
-  void Optimize(arma::Mat<eT>& weights,
-                arma::Mat<eT>& gradient,
-                arma::Mat<eT>& meanSquaredGradient,
-                arma::Mat<eT>& meanSquaredGradientDx)
-  {
-    // Accumulate gradient.
-    meanSquaredGradient *= rho;
-    meanSquaredGradient += (1 - rho) * (gradient % gradient);
-    arma::Mat<eT> dx = arma::sqrt((meanSquaredGradientDx + eps) /
-        (meanSquaredGradient + eps)) % gradient;
-
-    // Accumulate updates.
-    meanSquaredGradientDx *= rho;
-    meanSquaredGradientDx += (1 - rho) * (dx % dx);
-
-    // Apply update.
-    weights -= dx;
-  }
-
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The value used as interpolation parameter.
-  const double rho;
-
-  //! The value used as eps.
-  const double eps;
-
-  //! The current gradient.
-  DataType gradient;
-
-  //! The current mean squared gradient.
-  DataType meanSquaredGradient;
-
-  //! The current mean squared gradient.
-  DataType meanSquaredGradientDx;
-}; // class AdaDelta
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/optimizer/adam.hpp b/src/mlpack/methods/ann/optimizer/adam.hpp
deleted file mode 100644
index 3240207..0000000
--- a/src/mlpack/methods/ann/optimizer/adam.hpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- * @file adam.hpp
- * @author Marcus Edel
- *
- * Implementation of the Adam optimizer. Adam is an an algorithm for first-
- * order gradient-based optimization of stochastic objective functions, based on
- * adaptive estimates of lower-order moments.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_ADAM_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_ADAM_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Adam is an optimizer that computes individual adaptive learning rates for
- * different parameters from estimates of first and second moments of the
- * gradients.
- *
- * For more information, see the following.
- *
- * @code
- * @article{Kingma2014,
- *   author    = {Diederik P. Kingma and Jimmy Ba},
- *   title     = {Adam: {A} Method for Stochastic Optimization},
- *   journal   = {CoRR},
- *   year      = {2014}
- * }
- * @endcode
- */
-template<typename DecomposableFunctionType, typename DataType>
-class Adam
-{
- public:
-  /**
-   * Construct the Adam optimizer with the given function and parameters.
-   *
-   * @param function Function to be optimized (minimized).
-   * @param lr The learning rate coefficient.
-   * @param beta1 The first moment coefficient.
-   * @param beta2 The second moment coefficient.
-   * @param eps The eps coefficient to avoid division by zero (numerical
-   *        stability).
-   */
-  Adam(DecomposableFunctionType& function,
-          const double lr = 0.001,
-          const double beta1 = 0.9,
-          const double beta2 = 0.999,
-          const double eps = 1e-8) :
-      function(function),
-      lr(lr),
-      beta1(beta1),
-      beta2(beta2),
-      eps(eps)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Optimize the given function using Adam.
-   */
-  void Optimize()
-  {
-    if (mean.n_elem == 0)
-    {
-      mean = function.Weights();
-      mean.zeros();
-
-      variance = mean;
-    }
-
-    Optimize(function.Weights(), gradient, mean, variance);
-  }
-
-  /*
-   * Sum up all gradients and store the results in the gradients storage.
-   */
-  void Update()
-  {
-    if (gradient.n_elem != 0)
-    {
-      gradient += function.Gradient();
-    }
-    else
-    {
-      gradient = function.Gradient();
-    }
-  }
-
-  /*
-   * Reset the gradient storage.
-   */
-  void Reset()
-  {
-    gradient.zeros();
-  }
-
-  //! Get the gradient.
-  DataType& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  DataType& Gradient() { return gradient; }
-
- private:
-  /**
-   * Optimize the given function using Adam.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param mean The current mean parameter.
-   * @param variance The current variance parameter.
-   */
-  template<typename eT>
-  void Optimize(arma::Cube<eT>& weights,
-                arma::Cube<eT>& gradient,
-                arma::Cube<eT>& mean,
-                arma::Cube<eT>& variance)
-  {
-    for (size_t s = 0; s < weights.n_slices; s++)
-    {
-      Optimize(weights.slice(s), gradient.slice(s), mean.slice(s),
-          variance.slice(s));
-    }
-  }
-
-  /**
-   * Optimize the given function using Adam.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param mean The current mean parameter.
-   * @param variance The current variance parameter.
-   */
-  template<typename eT>
-  void Optimize(arma::Mat<eT>& weights,
-                arma::Mat<eT>& gradient,
-                arma::Mat<eT>& mean,
-                arma::Mat<eT>& variance)
-  {
-    // Accumulate updates.
-    mean += (1 - beta1) * (gradient - mean);
-    variance += (1 - beta2) * (gradient % gradient - variance);
-
-    // Apply update.
-    weights -= lr * mean / (arma::sqrt(variance) + eps);
-  }
-
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The value used as learning rate.
-  const double lr;
-
-  //! The value used as first moment coefficient.
-  const double beta1;
-
-  //! The value used as second moment coefficient.
-  const double beta2;
-
-  //! The value used as eps.
-  const double eps;
-
-  //! The current gradient.
-  DataType gradient;
-
-  //! The current mean parameter.
-  DataType mean;
-
-  //! The current variance parameter.
-  DataType variance;
-}; // class Adam
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/optimizer/rmsprop.hpp b/src/mlpack/methods/ann/optimizer/rmsprop.hpp
deleted file mode 100644
index 6fcde44..0000000
--- a/src/mlpack/methods/ann/optimizer/rmsprop.hpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- * @file rmsprop.hpp
- * @author Marcus Edel
- *
- * Implementation of the RmsProp optimizer. RmsProp is an optimizer that
- * utilizes the magnitude of recent gradients to normalize the gradients.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_RMSPROP_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_RMSPROP_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * RmsProp is an optimizer that utilizes the magnitude of recent gradients to
- * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$
- * and a decay term \f$ \alpha \f$ we perform the following updates:
- *
- * \f{eqnarray*}{
- * r_t &=& (1 - \gamma) f'(\Delta_t)^2 + \gamma r_{t - 1} \\
- * v_{t + 1} &=& \frac{\alpha}{\sqrt{r_t}}f'(\Delta_t) \\
- * \Delta_{t + 1} &=& \Delta_t - v_{t + 1}
- * \f}
- *
- * For more information, see the following.
- *
- * @code
- * @misc{tieleman2012,
- *   title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine
- *   Learning},
- *   year={2012}
- * }
- * @endcode
- */
-template<typename DecomposableFunctionType, typename DataType>
-class RMSPROP
-{
- public:
-  /**
-   * Construct the RMSPROP optimizer with the given function and parameters.
-   *
-   * @param function Function to be optimized (minimized).
-   * @param lr The learning rate coefficient.
-   * @param alpha Constant similar to that used in AdaDelta and Momentum methods.
-   * @param eps The eps coefficient to avoid division by zero (numerical
-   *        stability).
-   */
-  RMSPROP(DecomposableFunctionType& function,
-          const double lr = 0.01,
-          const double alpha = 0.99,
-          const double eps = 1e-8) :
-      function(function),
-      lr(lr),
-      alpha(alpha),
-      eps(eps)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Optimize the given function using RmsProp.
-   */
-  void Optimize()
-  {
-    if (meanSquaredGad.n_elem == 0)
-    {
-      meanSquaredGad = function.Weights();
-      meanSquaredGad.zeros();
-    }
-
-    Optimize(function.Weights(), gradient, meanSquaredGad);
-  }
-
-  /*
-   * Sum up all gradients and store the results in the gradients storage.
-   */
-  void Update()
-  {
-    if (gradient.n_elem != 0)
-    {
-      DataType outputGradient = function.Gradient();
-      gradient += outputGradient;
-    }
-    else
-    {
-      gradient = function.Gradient();
-    }
-  }
-
-  /*
-   * Reset the gradient storage.
-   */
-  void Reset()
-  {
-    gradient.zeros();
-  }
-
-  //! Get the gradient.
-  DataType& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  DataType& Gradient() { return gradient; }
-
- private:
-  /**
-   * Optimize the given function using RmsProp.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param meanSquaredGradient The moving average over the root mean squared
-   *    gradient used to update the weights.
-   */
-  template<typename eT>
-  void Optimize(arma::Cube<eT>& weights,
-                arma::Cube<eT>& gradient,
-                arma::Cube<eT>& meanSquaredGradient)
-  {
-    for (size_t s = 0; s < weights.n_slices; s++)
-      Optimize(weights.slice(s), gradient.slice(s), meanSquaredGradient.slice(s));
-  }
-
-  /**
-   * Optimize the given function using RmsProp.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param meanSquaredGradient The moving average over the root mean squared
-   *    gradient used to update the weights.
-   */
-  template<typename eT>
-  void Optimize(arma::Mat<eT>& weights,
-                arma::Mat<eT>& gradient,
-                arma::Mat<eT>& meanSquaredGradient)
-  {
-    meanSquaredGradient *= alpha;
-    meanSquaredGradient += (1 - alpha) * (gradient % gradient);
-    weights -= lr * gradient / (arma::sqrt(meanSquaredGradient) + eps);
-  }
-
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The value used as learning rate.
-  const double lr;
-
-  //! The value used as alpha
-  const double alpha;
-
-  //! The value used as eps.
-  const double eps;
-
-  //! The current mean squared error of the gradients.
-  DataType meanSquaredGad;
-
-  //! The current gradient.
-  DataType gradient;
-}; // class RMSPROP
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/optimizer/steepest_descent.hpp b/src/mlpack/methods/ann/optimizer/steepest_descent.hpp
deleted file mode 100644
index 3d5f927..0000000
--- a/src/mlpack/methods/ann/optimizer/steepest_descent.hpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * @file steepest_descent.hpp
- * @author Marcus Edel
- *
- * Implementation of the steepest descent optimizer. The method of steepest
- * descent, also called the gradient descent method, is used to find the
- * nearest local minimum of a function which the assumtion that the gradient of
- * the function can be computed.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_STEEPEST_DESCENT_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_STEEPEST_DESCENT_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class is used to update the weights using steepest descent.
- *
- * @tparam DataType Type of input data (should be arma::mat,
- * arma::spmat or arma::cube).
- */
-template<typename DecomposableFunctionType, typename DataType>
-class SteepestDescent
-{
- public:
-  /**
-   * Construct the SteepestDescent optimizer with the given function and
-   * parameters.
-   *
-   * @param function Function to be optimized (minimized).
-   * @param lr The learning rate coefficient.
-   * @param mom The momentum coefficient.
-   */
-  SteepestDescent(DecomposableFunctionType& function,
-                  const double lr = 0.5,
-                  const double mom = 0) :
-      function(function),
-      lr(lr),
-      mom(mom)
-
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Optimize the given function using steepest descent.
-   */
-  void Optimize()
-  {
-    if (momWeights.n_elem == 0)
-    {
-      momWeights = function.Weights();
-      momWeights.zeros();
-    }
-
-    Optimize(function.Weights(), gradient, momWeights);
-  }
-
-  /*
-   * Sum up all gradients and store the results in the gradients storage.
-   */
-  void Update()
-  {
-    if (gradient.n_elem != 0)
-    {
-      DataType outputGradient = function.Gradient();
-      gradient += outputGradient;
-    }
-    else
-    {
-      gradient = function.Gradient();
-    }
-  }
-
-  /*
-   * Reset the gradient storage.
-   */
-  void Reset()
-  {
-    gradient.zeros();
-  }
-
- private:
-  /** Optimize the given function using steepest descent.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param gradient The moving average over the root mean squared gradient used
-   *    to update the weights.
-   */
-  template<typename eT>
-  void Optimize(arma::Cube<eT>& weights,
-                arma::Cube<eT>& gradient,
-                arma::Cube<eT>& momWeights)
-  {
-    for (size_t s = 0; s < weights.n_slices; s++)
-      Optimize(weights.slice(s), gradient.slice(s), momWeights.slice(s));
-  }
-
-  /**
-   * Optimize the given function using steepest descent.
-   *
-   * @param weights The weights that should be updated.
-   * @param gradient The gradient used to update the weights.
-   * @param gradient The moving average over the root mean squared gradient used
-   *    to update the weights.
-   */
-  template<typename eT>
-  void Optimize(arma::Mat<eT>& weights,
-                arma::Mat<eT>& gradient,
-                arma::Mat<eT>& momWeights)
-  {
-    if (mom > 0)
-    {
-      momWeights *= mom;
-      momWeights += (lr * gradient);
-      weights -= momWeights;
-    }
-    else
-    {
-      weights -= lr * gradient;
-    }
-  }
-
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The value used as learning rate.
-  const double lr;
-
-  //! The value used as momentum.
-  const double mom;
-
-  //! Momentum matrix.
-  DataType momWeights;
-
-  //! The current gradient.
-  DataType gradient;
-}; // class SteepestDescent
-
-} // namespace ann
-} // namespace mlpack
-
-#endif