[mlpack-git] master: Divide the root square of the second decay value by the first decay value; clarification of the beta parameter. (66cb285)

Fri Apr 1 12:17:57 EDT 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/6359987ecb0cbf762dc2b2167e574ae595a120d8...66cb285dc7d80068ac9ba6a5f40d239f1a6f672d

>---------------------------------------------------------------

commit 66cb285dc7d80068ac9ba6a5f40d239f1a6f672d
Author: marcus <marcus.edel at fu-berlin.de>
Date:   Fri Apr 1 18:17:57 2016 +0200

    Divide the root square of the second decay value by the first decay value; clarification of the beta parameter.


>---------------------------------------------------------------

66cb285dc7d80068ac9ba6a5f40d239f1a6f672d
 src/mlpack/core/optimizers/adam/adam.hpp      | 9 +++++----
 src/mlpack/core/optimizers/adam/adam_impl.hpp | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index 965ecfb..9894664 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -67,8 +67,9 @@ class Adam
    *
    * @param function Function to be optimized (minimized).
    * @param stepSize Step size for each iteration.
-   * @param beta1 The first moment coefficient.
-   * @param beta2 The second moment coefficient.
+   * @param beta1 Exponential decay rate for the first moment estimates.
+   * @param beta2 Exponential decay rate for the weighted infinity norm
+            estimates.
    * @param eps Value used to initialise the mean squared gradient parameter.
    * @param maxIterations Maximum number of iterations allowed (0 means no
    *        limit).
@@ -142,10 +143,10 @@ class Adam
   //! The step size for each example.
   double stepSize;
 
-  //! The value used as first moment coefficient.
+  //! Exponential decay rate for the first moment estimates.
   double beta1;
 
-  //! The value used as second moment coefficient.
+  //! Exponential decay rate for the weighted infinity norm estimates.
   double beta2;
 
   //! The value used to initialise the mean squared gradient parameter.
diff --git a/src/mlpack/core/optimizers/adam/adam_impl.hpp b/src/mlpack/core/optimizers/adam/adam_impl.hpp
index bd1b953..878cddc 100644
--- a/src/mlpack/core/optimizers/adam/adam_impl.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_impl.hpp
@@ -111,10 +111,10 @@ double Adam<DecomposableFunctionType>::Optimize(arma::mat& iterate)
     variance *= beta2;
     variance += (1 - beta2) * (gradient % gradient);
 
-    double biasCorrection1 = 1.0 - std::pow(beta1, (double) i);
-    double biasCorrection2 = 1.0 - std::pow(beta2, (double) i);
+    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) i);
+    const double biasCorrection2 = 1.0 - std::pow(beta2, (double) i);
 
-    iterate -= (stepSize * std::sqrt(biasCorrection1) / biasCorrection2) *
+    iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) *
         mean / (arma::sqrt(variance) + eps);
 
     // Now add that to the overall objective function.