[mlpack-git] master: Remove unused ann optimizer; For more information take a look at #555. (74d499a)
gitdub at mlpack.org
gitdub at mlpack.org
Fri Mar 18 11:44:26 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/5641b4de32a0616b62fbf4e220f09c0d0f257d7a...74d499aa051a48f006f2a2cac18fce5e5d007164
>---------------------------------------------------------------
commit 74d499aa051a48f006f2a2cac18fce5e5d007164
Author: marcus <marcus.edel at fu-berlin.de>
Date: Fri Mar 18 16:44:26 2016 +0100
Remove unused ann optimizer; For more information take a look at #555.
>---------------------------------------------------------------
74d499aa051a48f006f2a2cac18fce5e5d007164
src/mlpack/methods/ann/CMakeLists.txt | 1 -
src/mlpack/methods/ann/optimizer/CMakeLists.txt | 17 --
src/mlpack/methods/ann/optimizer/ada_delta.hpp | 175 --------------------
src/mlpack/methods/ann/optimizer/adam.hpp | 177 ---------------------
src/mlpack/methods/ann/optimizer/rmsprop.hpp | 163 -------------------
.../methods/ann/optimizer/steepest_descent.hpp | 146 -----------------
6 files changed, 679 deletions(-)
diff --git a/src/mlpack/methods/ann/CMakeLists.txt b/src/mlpack/methods/ann/CMakeLists.txt
index 7dd9892..c62459a 100644
--- a/src/mlpack/methods/ann/CMakeLists.txt
+++ b/src/mlpack/methods/ann/CMakeLists.txt
@@ -23,7 +23,6 @@ set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
add_subdirectory(activation_functions)
add_subdirectory(init_rules)
add_subdirectory(layer)
-add_subdirectory(optimizer)
add_subdirectory(performance_functions)
add_subdirectory(trainer)
add_subdirectory(pooling_rules)
diff --git a/src/mlpack/methods/ann/optimizer/CMakeLists.txt b/src/mlpack/methods/ann/optimizer/CMakeLists.txt
deleted file mode 100644
index 53b877e..0000000
--- a/src/mlpack/methods/ann/optimizer/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
- ada_delta.hpp
- adam.hpp
- rmsprop.hpp
- steepest_descent.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
- set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/optimizer/ada_delta.hpp b/src/mlpack/methods/ann/optimizer/ada_delta.hpp
deleted file mode 100644
index 76dc195..0000000
--- a/src/mlpack/methods/ann/optimizer/ada_delta.hpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * @file ada_delta.hpp
- * @author Marcus Edel
- *
- * Implementation of the Adadelta optimizer. Adadelta is an optimizer that
- * dynamically adapts over time using only first order information.
- * Additionally, Adadelta requires no manual tuning of a learning rate.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_ADA_DELTA_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_ADA_DELTA_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Adadelta is an optimizer that uses two ideas to improve upon the two main
- * drawbacks of the Adagrad method:
- *
- * - Accumulate Over Window
- * - Correct Units with Hessian Approximation
- *
- * For more information, see the following.
- *
- * @code
- * @article{Zeiler2012,
- * author = {Matthew D. Zeiler},
- * title = {{ADADELTA:} An Adaptive Learning Rate Method},
- * journal = {CoRR},
- * year = {2012}
- * }
- * @endcode
- */
-template<typename DecomposableFunctionType, typename DataType>
-class AdaDelta
-{
- public:
- /**
- * Construct the AdaDelta optimizer with the given function and parameters.
- *
- * @param function Function to be optimized (minimized).
- * @param rho Constant interpolation parameter similar to that used in
- * Momentum methods.
- * @param eps The eps coefficient to avoid division by zero (numerical
- * stability).
- */
- AdaDelta(DecomposableFunctionType& function,
- const double rho = 0.95,
- const double eps = 1e-6) :
- function(function),
- rho(rho),
- eps(eps)
- {
- // Nothing to do here.
- }
-
- /**
- * Optimize the given function using AdaDelta.
- */
- void Optimize()
- {
- if (meanSquaredGradient.n_elem == 0)
- {
- meanSquaredGradient = function.Weights();
- meanSquaredGradient.zeros();
-
- meanSquaredGradientDx = meanSquaredGradient;
- }
-
- Optimize(function.Weights(), gradient, meanSquaredGradient,
- meanSquaredGradientDx);
- }
-
- /*
- * Sum up all gradients and store the results in the gradients storage.
- */
- void Update()
- {
- if (gradient.n_elem != 0)
- {
- gradient += function.Gradient();
- }
- else
- {
- gradient = function.Gradient();
- }
- }
-
- /*
- * Reset the gradient storage.
- */
- void Reset()
- {
- gradient.zeros();
- }
-
- //! Get the gradient.
- DataType& Gradient() const { return gradient; }
- //! Modify the gradient.
- DataType& Gradient() { return gradient; }
-
- private:
- /**
- * Optimize the given function using AdaDelta.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param meanSquaredGradient The current mean squared gradient.
- * @param meanSquaredGradientDx The current mean squared Dx gradient.
- */
- template<typename eT>
- void Optimize(arma::Cube<eT>& weights,
- arma::Cube<eT>& gradient,
- arma::Cube<eT>& meanSquaredGradient,
- arma::Cube<eT>& meanSquaredGradientDx)
- {
- for (size_t s = 0; s < weights.n_slices; s++)
- {
- Optimize(weights.slice(s), gradient.slice(s), meanSquaredGradient.slice(s),
- meanSquaredGradientDx.slice(s));
- }
- }
-
- /**
- * Optimize the given function using AdaDelta.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param meanSquaredGradient The current mean squared gradient.
- * @param meanSquaredGradientDx The current mean squared Dx gradient.
- */
- template<typename eT>
- void Optimize(arma::Mat<eT>& weights,
- arma::Mat<eT>& gradient,
- arma::Mat<eT>& meanSquaredGradient,
- arma::Mat<eT>& meanSquaredGradientDx)
- {
- // Accumulate gradient.
- meanSquaredGradient *= rho;
- meanSquaredGradient += (1 - rho) * (gradient % gradient);
- arma::Mat<eT> dx = arma::sqrt((meanSquaredGradientDx + eps) /
- (meanSquaredGradient + eps)) % gradient;
-
- // Accumulate updates.
- meanSquaredGradientDx *= rho;
- meanSquaredGradientDx += (1 - rho) * (dx % dx);
-
- // Apply update.
- weights -= dx;
- }
-
- //! The instantiated function.
- DecomposableFunctionType& function;
-
- //! The value used as interpolation parameter.
- const double rho;
-
- //! The value used as eps.
- const double eps;
-
- //! The current gradient.
- DataType gradient;
-
- //! The current mean squared gradient.
- DataType meanSquaredGradient;
-
- //! The current mean squared gradient.
- DataType meanSquaredGradientDx;
-}; // class AdaDelta
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/optimizer/adam.hpp b/src/mlpack/methods/ann/optimizer/adam.hpp
deleted file mode 100644
index 3240207..0000000
--- a/src/mlpack/methods/ann/optimizer/adam.hpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- * @file adam.hpp
- * @author Marcus Edel
- *
- * Implementation of the Adam optimizer. Adam is an an algorithm for first-
- * order gradient-based optimization of stochastic objective functions, based on
- * adaptive estimates of lower-order moments.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_ADAM_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_ADAM_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Adam is an optimizer that computes individual adaptive learning rates for
- * different parameters from estimates of first and second moments of the
- * gradients.
- *
- * For more information, see the following.
- *
- * @code
- * @article{Kingma2014,
- * author = {Diederik P. Kingma and Jimmy Ba},
- * title = {Adam: {A} Method for Stochastic Optimization},
- * journal = {CoRR},
- * year = {2014}
- * }
- * @endcode
- */
-template<typename DecomposableFunctionType, typename DataType>
-class Adam
-{
- public:
- /**
- * Construct the Adam optimizer with the given function and parameters.
- *
- * @param function Function to be optimized (minimized).
- * @param lr The learning rate coefficient.
- * @param beta1 The first moment coefficient.
- * @param beta2 The second moment coefficient.
- * @param eps The eps coefficient to avoid division by zero (numerical
- * stability).
- */
- Adam(DecomposableFunctionType& function,
- const double lr = 0.001,
- const double beta1 = 0.9,
- const double beta2 = 0.999,
- const double eps = 1e-8) :
- function(function),
- lr(lr),
- beta1(beta1),
- beta2(beta2),
- eps(eps)
- {
- // Nothing to do here.
- }
-
- /**
- * Optimize the given function using Adam.
- */
- void Optimize()
- {
- if (mean.n_elem == 0)
- {
- mean = function.Weights();
- mean.zeros();
-
- variance = mean;
- }
-
- Optimize(function.Weights(), gradient, mean, variance);
- }
-
- /*
- * Sum up all gradients and store the results in the gradients storage.
- */
- void Update()
- {
- if (gradient.n_elem != 0)
- {
- gradient += function.Gradient();
- }
- else
- {
- gradient = function.Gradient();
- }
- }
-
- /*
- * Reset the gradient storage.
- */
- void Reset()
- {
- gradient.zeros();
- }
-
- //! Get the gradient.
- DataType& Gradient() const { return gradient; }
- //! Modify the gradient.
- DataType& Gradient() { return gradient; }
-
- private:
- /**
- * Optimize the given function using Adam.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param mean The current mean parameter.
- * @param variance The current variance parameter.
- */
- template<typename eT>
- void Optimize(arma::Cube<eT>& weights,
- arma::Cube<eT>& gradient,
- arma::Cube<eT>& mean,
- arma::Cube<eT>& variance)
- {
- for (size_t s = 0; s < weights.n_slices; s++)
- {
- Optimize(weights.slice(s), gradient.slice(s), mean.slice(s),
- variance.slice(s));
- }
- }
-
- /**
- * Optimize the given function using Adam.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param mean The current mean parameter.
- * @param variance The current variance parameter.
- */
- template<typename eT>
- void Optimize(arma::Mat<eT>& weights,
- arma::Mat<eT>& gradient,
- arma::Mat<eT>& mean,
- arma::Mat<eT>& variance)
- {
- // Accumulate updates.
- mean += (1 - beta1) * (gradient - mean);
- variance += (1 - beta2) * (gradient % gradient - variance);
-
- // Apply update.
- weights -= lr * mean / (arma::sqrt(variance) + eps);
- }
-
- //! The instantiated function.
- DecomposableFunctionType& function;
-
- //! The value used as learning rate.
- const double lr;
-
- //! The value used as first moment coefficient.
- const double beta1;
-
- //! The value used as second moment coefficient.
- const double beta2;
-
- //! The value used as eps.
- const double eps;
-
- //! The current gradient.
- DataType gradient;
-
- //! The current mean parameter.
- DataType mean;
-
- //! The current variance parameter.
- DataType variance;
-}; // class Adam
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/optimizer/rmsprop.hpp b/src/mlpack/methods/ann/optimizer/rmsprop.hpp
deleted file mode 100644
index 6fcde44..0000000
--- a/src/mlpack/methods/ann/optimizer/rmsprop.hpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- * @file rmsprop.hpp
- * @author Marcus Edel
- *
- * Implementation of the RmsProp optimizer. RmsProp is an optimizer that
- * utilizes the magnitude of recent gradients to normalize the gradients.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_RMSPROP_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_RMSPROP_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * RmsProp is an optimizer that utilizes the magnitude of recent gradients to
- * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$
- * and a decay term \f$ \alpha \f$ we perform the following updates:
- *
- * \f{eqnarray*}{
- * r_t &=& (1 - \gamma) f'(\Delta_t)^2 + \gamma r_{t - 1} \\
- * v_{t + 1} &=& \frac{\alpha}{\sqrt{r_t}}f'(\Delta_t) \\
- * \Delta_{t + 1} &=& \Delta_t - v_{t + 1}
- * \f}
- *
- * For more information, see the following.
- *
- * @code
- * @misc{tieleman2012,
- * title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine
- * Learning},
- * year={2012}
- * }
- * @endcode
- */
-template<typename DecomposableFunctionType, typename DataType>
-class RMSPROP
-{
- public:
- /**
- * Construct the RMSPROP optimizer with the given function and parameters.
- *
- * @param function Function to be optimized (minimized).
- * @param lr The learning rate coefficient.
- * @param alpha Constant similar to that used in AdaDelta and Momentum methods.
- * @param eps The eps coefficient to avoid division by zero (numerical
- * stability).
- */
- RMSPROP(DecomposableFunctionType& function,
- const double lr = 0.01,
- const double alpha = 0.99,
- const double eps = 1e-8) :
- function(function),
- lr(lr),
- alpha(alpha),
- eps(eps)
- {
- // Nothing to do here.
- }
-
- /**
- * Optimize the given function using RmsProp.
- */
- void Optimize()
- {
- if (meanSquaredGad.n_elem == 0)
- {
- meanSquaredGad = function.Weights();
- meanSquaredGad.zeros();
- }
-
- Optimize(function.Weights(), gradient, meanSquaredGad);
- }
-
- /*
- * Sum up all gradients and store the results in the gradients storage.
- */
- void Update()
- {
- if (gradient.n_elem != 0)
- {
- DataType outputGradient = function.Gradient();
- gradient += outputGradient;
- }
- else
- {
- gradient = function.Gradient();
- }
- }
-
- /*
- * Reset the gradient storage.
- */
- void Reset()
- {
- gradient.zeros();
- }
-
- //! Get the gradient.
- DataType& Gradient() const { return gradient; }
- //! Modify the gradient.
- DataType& Gradient() { return gradient; }
-
- private:
- /**
- * Optimize the given function using RmsProp.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param meanSquaredGradient The moving average over the root mean squared
- * gradient used to update the weights.
- */
- template<typename eT>
- void Optimize(arma::Cube<eT>& weights,
- arma::Cube<eT>& gradient,
- arma::Cube<eT>& meanSquaredGradient)
- {
- for (size_t s = 0; s < weights.n_slices; s++)
- Optimize(weights.slice(s), gradient.slice(s), meanSquaredGradient.slice(s));
- }
-
- /**
- * Optimize the given function using RmsProp.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param meanSquaredGradient The moving average over the root mean squared
- * gradient used to update the weights.
- */
- template<typename eT>
- void Optimize(arma::Mat<eT>& weights,
- arma::Mat<eT>& gradient,
- arma::Mat<eT>& meanSquaredGradient)
- {
- meanSquaredGradient *= alpha;
- meanSquaredGradient += (1 - alpha) * (gradient % gradient);
- weights -= lr * gradient / (arma::sqrt(meanSquaredGradient) + eps);
- }
-
- //! The instantiated function.
- DecomposableFunctionType& function;
-
- //! The value used as learning rate.
- const double lr;
-
- //! The value used as alpha
- const double alpha;
-
- //! The value used as eps.
- const double eps;
-
- //! The current mean squared error of the gradients.
- DataType meanSquaredGad;
-
- //! The current gradient.
- DataType gradient;
-}; // class RMSPROP
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/optimizer/steepest_descent.hpp b/src/mlpack/methods/ann/optimizer/steepest_descent.hpp
deleted file mode 100644
index 3d5f927..0000000
--- a/src/mlpack/methods/ann/optimizer/steepest_descent.hpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * @file steepest_descent.hpp
- * @author Marcus Edel
- *
- * Implementation of the steepest descent optimizer. The method of steepest
- * descent, also called the gradient descent method, is used to find the
- * nearest local minimum of a function which the assumtion that the gradient of
- * the function can be computed.
- */
-#ifndef __MLPACK_METHODS_ANN_OPTIMIZER_STEEPEST_DESCENT_HPP
-#define __MLPACK_METHODS_ANN_OPTIMIZER_STEEPEST_DESCENT_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class is used to update the weights using steepest descent.
- *
- * @tparam DataType Type of input data (should be arma::mat,
- * arma::spmat or arma::cube).
- */
-template<typename DecomposableFunctionType, typename DataType>
-class SteepestDescent
-{
- public:
- /**
- * Construct the SteepestDescent optimizer with the given function and
- * parameters.
- *
- * @param function Function to be optimized (minimized).
- * @param lr The learning rate coefficient.
- * @param mom The momentum coefficient.
- */
- SteepestDescent(DecomposableFunctionType& function,
- const double lr = 0.5,
- const double mom = 0) :
- function(function),
- lr(lr),
- mom(mom)
-
- {
- // Nothing to do here.
- }
-
- /**
- * Optimize the given function using steepest descent.
- */
- void Optimize()
- {
- if (momWeights.n_elem == 0)
- {
- momWeights = function.Weights();
- momWeights.zeros();
- }
-
- Optimize(function.Weights(), gradient, momWeights);
- }
-
- /*
- * Sum up all gradients and store the results in the gradients storage.
- */
- void Update()
- {
- if (gradient.n_elem != 0)
- {
- DataType outputGradient = function.Gradient();
- gradient += outputGradient;
- }
- else
- {
- gradient = function.Gradient();
- }
- }
-
- /*
- * Reset the gradient storage.
- */
- void Reset()
- {
- gradient.zeros();
- }
-
- private:
- /** Optimize the given function using steepest descent.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param gradient The moving average over the root mean squared gradient used
- * to update the weights.
- */
- template<typename eT>
- void Optimize(arma::Cube<eT>& weights,
- arma::Cube<eT>& gradient,
- arma::Cube<eT>& momWeights)
- {
- for (size_t s = 0; s < weights.n_slices; s++)
- Optimize(weights.slice(s), gradient.slice(s), momWeights.slice(s));
- }
-
- /**
- * Optimize the given function using steepest descent.
- *
- * @param weights The weights that should be updated.
- * @param gradient The gradient used to update the weights.
- * @param gradient The moving average over the root mean squared gradient used
- * to update the weights.
- */
- template<typename eT>
- void Optimize(arma::Mat<eT>& weights,
- arma::Mat<eT>& gradient,
- arma::Mat<eT>& momWeights)
- {
- if (mom > 0)
- {
- momWeights *= mom;
- momWeights += (lr * gradient);
- weights -= momWeights;
- }
- else
- {
- weights -= lr * gradient;
- }
- }
-
- //! The instantiated function.
- DecomposableFunctionType& function;
-
- //! The value used as learning rate.
- const double lr;
-
- //! The value used as momentum.
- const double mom;
-
- //! Momentum matrix.
- DataType momWeights;
-
- //! The current gradient.
- DataType gradient;
-}; // class SteepestDescent
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
More information about the mlpack-git
mailing list