[mlpack-git] master: Minor documentation updates and formatting changes. (90f9f8f)

Wed Oct 14 10:31:22 EDT 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ce49a4b5f0b7d12d4955c09e45c69891a6f83e8a...90f9f8f270c743ecc38542bb5b04aae83608bc9c

>---------------------------------------------------------------

commit 90f9f8f270c743ecc38542bb5b04aae83608bc9c
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Wed Oct 14 16:31:16 2015 +0200

    Minor documentation updates and formatting changes.


>---------------------------------------------------------------

90f9f8f270c743ecc38542bb5b04aae83608bc9c
 src/mlpack/methods/ann/optimizer/ada_delta.hpp     | 40 ++++++++++------------
 src/mlpack/methods/ann/optimizer/rmsprop.hpp       |  5 +--
 .../methods/ann/optimizer/steepest_descent.hpp     |  2 +-
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/src/mlpack/methods/ann/optimizer/ada_delta.hpp b/src/mlpack/methods/ann/optimizer/ada_delta.hpp
index 0e74b6a..63a053d 100644
--- a/src/mlpack/methods/ann/optimizer/ada_delta.hpp
+++ b/src/mlpack/methods/ann/optimizer/ada_delta.hpp
@@ -2,8 +2,9 @@
  * @file ada_delta.hpp
  * @author Marcus Edel
  *
- * Implmentation of the RmsProp optimizer. Adadelta is an optimizer that uses
- * the magnitude of recent gradients and steps to obtain an adaptive step rate.
+ * Implementation of the Adadelta optimizer. Adadelta is an optimizer that
+ * dynamically adapts over time using only first order information.
+ * Additionally, Adadelta requires no manual tuning of a learning rate.
  */
 #ifndef __MLPACK_METHODS_ANN_OPTIMIZER_ADA_DELTA_HPP
 #define __MLPACK_METHODS_ANN_OPTIMIZER_ADA_DELTA_HPP
@@ -14,18 +15,11 @@ namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * Adadelta is an optimizer that uses the magnitude of recent gradients and
- * steps to obtain an adaptive step rate. In its basic form, given a step rate
- * \f$ \gamma \f$ and a decay term \f$ \alpha \f$ we perform the following
- * updates:
+ * Adadelta is an optimizer that uses two ideas to improve upon the two main
+ * drawbacks of the Adagrad method:
  *
- * \f{eqnarray*}{
- *  g_t &=& (1 - \gamma)f'(\Delta_t)^2 + \gamma g_{t - 1} \\
- *  \vec{\Delta} \Delta_t &=& \alpha \frac{\sqrt(s_{t-1} +
- *  \epsilon)}{\sqrt{g_t + \epsilon}} f'(\Delta_t) \\
- *  \Delta_{t + 1} &=& \Delta_t - \vec{\Delta} \Delta_t \\
- *  s_t &=& (1 - \gamma) \vec{\Delta} \Delta_t^2 + \gamma s_{t - 1}
- * \f}
+ *  - Accumulate Over Window
+ *  - Correct Units with Hessian Approximation
  *
  * For more information, see the following.
  *
@@ -46,8 +40,10 @@ class AdaDelta
    * Construct the AdaDelta optimizer with the given function and parameters.
    *
    * @param function Function to be optimized (minimized).
-   * @param rho Constant similar to that used in AdaDelta and Momentum methods.
-   * @param eps The eps coefficient to avoid division by zero.
+   * @param rho Constant interpolation parameter similar to that used in
+   *        Momentum methods.
+   * @param eps The eps coefficient to avoid division by zero (numerical
+   *        stability).
    */
   AdaDelta(DecomposableFunctionType& function,
           const double rho = 0.95,
@@ -60,7 +56,7 @@ class AdaDelta
   }
 
   /**
-   * Optimize the given function using RmsProp.
+   * Optimize the given function using AdaDelta.
    */
   void Optimize()
   {
@@ -110,8 +106,8 @@ class AdaDelta
    *
    * @param weights The weights that should be updated.
    * @param gradient The gradient used to update the weights.
-   * @param meanSquaredGradient The current mean squared gradient Dx
-   * @param meanSquaredGradientDx The current mean squared gradient.
+   * @param meanSquaredGradient The current mean squared gradient.
+   * @param meanSquaredGradientDx The current mean squared Dx gradient.
    */
   template<typename eT>
   void Optimize(arma::Cube<eT>& weights,
@@ -131,8 +127,8 @@ class AdaDelta
    *
    * @param weights The weights that should be updated.
    * @param gradient The gradient used to update the weights.
-   * @param meanSquaredGradient The current mean squared gradient Dx
-   * @param meanSquaredGradientDx The current mean squared gradient.
+   * @param meanSquaredGradient The current mean squared gradient.
+   * @param meanSquaredGradientDx The current mean squared Dx gradient.
    */
   template<typename eT>
   void Optimize(arma::Mat<eT>& weights,
@@ -157,7 +153,7 @@ class AdaDelta
   //! The instantiated function.
   DecomposableFunctionType& function;
 
-  //! The value used as learning rate.
+  //! The value used as interpolation parameter.
   const double rho;
 
   //! The value used as eps.
@@ -169,7 +165,7 @@ class AdaDelta
   //! The current mean squared gradient.
   DataType meanSquaredGradient;
 
-  //! The current mean squared gradient Dx
+  //! The current mean squared gradient.
   DataType meanSquaredGradientDx;
 }; // class AdaDelta
 
diff --git a/src/mlpack/methods/ann/optimizer/rmsprop.hpp b/src/mlpack/methods/ann/optimizer/rmsprop.hpp
index 063236d..277b74a 100644
--- a/src/mlpack/methods/ann/optimizer/rmsprop.hpp
+++ b/src/mlpack/methods/ann/optimizer/rmsprop.hpp
@@ -2,7 +2,7 @@
  * @file rmsprop.hpp
  * @author Marcus Edel
  *
- * Implmentation of the RmsProp optimizer. RmsProp is an optimizer that utilizes
+ * Implementation of the RmsProp optimizer. RmsProp is an optimizer that utilizes
  * the magnitude of recent gradients to normalize the gradients.
  */
 #ifndef __MLPACK_METHODS_ANN_OPTIMIZER_RMSPROP_HPP
@@ -44,7 +44,8 @@ class RMSPROP
    * @param function Function to be optimized (minimized).
    * @param lr The learning rate coefficient.
    * @param alpha Constant similar to that used in AdaDelta and Momentum methods.
-   * @param eps The eps coefficient to avoid division by zero.
+   * @param eps The eps coefficient to avoid division by zero (numerical
+   *        stability).
    */
   RMSPROP(DecomposableFunctionType& function,
           const double lr = 0.01,
diff --git a/src/mlpack/methods/ann/optimizer/steepest_descent.hpp b/src/mlpack/methods/ann/optimizer/steepest_descent.hpp
index b2b18d1..5639979 100644
--- a/src/mlpack/methods/ann/optimizer/steepest_descent.hpp
+++ b/src/mlpack/methods/ann/optimizer/steepest_descent.hpp
@@ -2,7 +2,7 @@
  * @file steepest_descent.hpp
  * @author Marcus Edel
  *
- * Implmentation of the steepest descent optimizer. The method of steepest
+ * Implementation of the steepest descent optimizer. The method of steepest
  * descent, also called the gradient descent method, is used to find the
  * nearest local minimum of a function which the assumtion that the gradient of
  * the function can be computed.