[mlpack-git] master: Implemeted gradient descent optimizer. (6de69d7)

Tue Sep 27 18:18:19 EDT 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/998bb2fae41210d03ddf007b51d994a9cf6262cf...5fd539549c181e602e48c18fd9c024382c42d676

>---------------------------------------------------------------

commit 6de69d728949b5f74579f4ffce72297138338705
Author: sumedhghaisas <sumedhghaisas at gmail.com>
Date:   Wed Sep 28 03:48:19 2016 +0530

    Implemeted gradient descent optimizer.


>---------------------------------------------------------------

6de69d728949b5f74579f4ffce72297138338705
 src/mlpack/core/optimizers/CMakeLists.txt          |   1 +
 .../{sgd => gradient_descent}/CMakeLists.txt       |   4 +-
 .../gradient_descent/gradient_descent.hpp          | 118 +++++++++++++++++++++
 .../gradient_descent/gradient_descent_impl.hpp     |  80 ++++++++++++++
 .../optimizers/gradient_descent/test_function.cpp  |  23 ++++
 .../{sgd => gradient_descent}/test_function.hpp    |  21 ++--
 src/mlpack/tests/CMakeLists.txt                    |   1 +
 src/mlpack/tests/gradient_descent_test.cpp         |  52 +++++++++
 8 files changed, 285 insertions(+), 15 deletions(-)

diff --git a/src/mlpack/core/optimizers/CMakeLists.txt b/src/mlpack/core/optimizers/CMakeLists.txt
index c5163da..54debc5 100644
--- a/src/mlpack/core/optimizers/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/CMakeLists.txt
@@ -2,6 +2,7 @@ set(DIRS
   adadelta
   adam
   aug_lagrangian
+  gradient_descent
   lbfgs
   minibatch_sgd
   rmsprop
diff --git a/src/mlpack/core/optimizers/sgd/CMakeLists.txt b/src/mlpack/core/optimizers/gradient_descent/CMakeLists.txt
similarity index 81%
copy from src/mlpack/core/optimizers/sgd/CMakeLists.txt
copy to src/mlpack/core/optimizers/gradient_descent/CMakeLists.txt
index 16c730a..808088f 100644
--- a/src/mlpack/core/optimizers/sgd/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/gradient_descent/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(SOURCES
-  sgd.hpp
-  sgd_impl.hpp
+  gradient_descent.hpp
+  gradient_descent_impl.hpp
   test_function.hpp
   test_function.cpp
 )
diff --git a/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
new file mode 100644
index 0000000..cf33b5b
--- /dev/null
+++ b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
@@ -0,0 +1,118 @@
+/**
+ * @file gradient_descent.hpp
+ * @author Sumedh Ghaisas
+ *
+ * Simple Gradient Descent.
+ */
+#ifndef MLPACK_CORE_OPTIMIZERS_GRADIENT_DESCENT_GRADIENT_DESCENT_HPP
+#define MLPACK_CORE_OPTIMIZERS_GRADIENT_DESCENT_GRADIENT_DESCENT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace optimization {
+
+/**
+ * Gradient Descent is a technique to minimize a function. To find a local 
+ * minimum of a function using gradient descent, one takes steps proportional 
+ * to the negative of the gradient of the function at the current point, 
+ * producing the following update scheme:
+ *
+ * \f[
+ * A_{j + 1} = A_j + \alpha \nabla F(A)
+ * \f]
+ *
+ * where \f$ \alpha \f$ is a parameter which specifies the step size. \f$ F \f$ 
+ * is the function being optimized. The algorithm continues until \f$ j
+ * \f$ reaches the maximum number of iterations---or when an update produces 
+ * an improvement within a certain tolerance \f$ \epsilon \f$.  That is,
+ *
+ * \f[
+ * | F(A_{j + 1}) - F(A_j) | < \epsilon.
+ * \f]
+ *
+ * The parameter \f$\epsilon\f$ is specified by the tolerance parameter to the
+ * constructor.
+ *
+ * For Gradient Descent to work, a FunctionType template parameter is required.
+ * This class must implement the following function:
+ *
+ *   double Evaluate(const arma::mat& coordinates);
+ *   void Gradient(const arma::mat& coordinates,
+ *                 arma::mat& gradient);
+ *
+ * @tparam FunctionType Decomposable objective function type to be
+ *     minimized.
+ */
+template<typename FunctionType>
+class GradientDescent
+{
+ public:
+  /**
+   * Construct the Gradient Descent optimizer with the given function and 
+   * parameters.  The defaults here are not necessarily good for the given 
+   * problem, so it is suggested that the values used be tailored to the task 
+   * at hand.
+   *
+   * @param function Function to be optimized (minimized).
+   * @param stepSize Step size for each iteration.
+   * @param maxIterations Maximum number of iterations allowed (0 means no
+   *     limit).
+   * @param tolerance Maximum absolute tolerance to terminate algorithm.
+   */
+  GradientDescent(FunctionType& function,
+      const double stepSize = 0.01,
+      const size_t maxIterations = 100000,
+      const double tolerance = 1e-5);
+
+  /**
+   * Optimize the given function using gradient descent.  The given starting 
+   * point will be modified to store the finishing point of the algorithm, and 
+   * the final objective value is returned.
+   *
+   * @param iterate Starting point (will be modified).
+   * @return Objective value of the final point.
+   */
+  double Optimize(arma::mat& iterate);
+
+  //! Get the instantiated function to be optimized.
+  const FunctionType& Function() const { return function; }
+  //! Modify the instantiated function.
+  FunctionType& Function() { return function; }
+
+  //! Get the step size.
+  double StepSize() const { return stepSize; }
+  //! Modify the step size.
+  double& StepSize() { return stepSize; }
+
+  //! Get the maximum number of iterations (0 indicates no limit).
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the maximum number of iterations (0 indicates no limit).
+  size_t& MaxIterations() { return maxIterations; }
+
+  //! Get the tolerance for termination.
+  double Tolerance() const { return tolerance; }
+  //! Modify the tolerance for termination.
+  double& Tolerance() { return tolerance; }
+
+ private:
+  //! The instantiated function.
+  FunctionType& function;
+
+  //! The step size for each example.
+  double stepSize;
+
+  //! The maximum number of allowed iterations.
+  size_t maxIterations;
+
+  //! The tolerance for termination.
+  double tolerance;
+};
+
+} // namespace optimization
+} // namespace mlpack
+
+// Include implementation.
+#include "gradient_descent_impl.hpp"
+
+#endif
diff --git a/src/mlpack/core/optimizers/gradient_descent/gradient_descent_impl.hpp b/src/mlpack/core/optimizers/gradient_descent/gradient_descent_impl.hpp
new file mode 100644
index 0000000..9f17b09
--- /dev/null
+++ b/src/mlpack/core/optimizers/gradient_descent/gradient_descent_impl.hpp
@@ -0,0 +1,80 @@
+/**
+ * @file gradient_descent_impl.hpp
+ * @author Sumedh Ghaisas
+ *
+ * Simple gradient descent implementation.
+ */
+#ifndef MLPACK_CORE_OPTIMIZERS_GRADIENT_DESCENT_GRADIENT_DESCENT_IMPL_HPP
+#define MLPACK_CORE_OPTIMIZERS_GRADIENT_DESCENT_GRADIENT_DESCENT_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "gradient_descent.hpp"
+
+namespace mlpack {
+namespace optimization {
+
+template<typename FunctionType>
+GradientDescent<FunctionType>::GradientDescent(
+    FunctionType& function,
+    const double stepSize,
+    const size_t maxIterations,
+    const double tolerance) :
+    function(function),
+    stepSize(stepSize),
+    maxIterations(maxIterations),
+    tolerance(tolerance)
+{ /* Nothing to do. */ }
+
+//! Optimize the function (minimize).
+template<typename FunctionType>
+double GradientDescent<FunctionType>::Optimize(
+    arma::mat& iterate)
+{
+  // To keep track of where we are and how things are going.
+  double overallObjective = function.Evaluate(iterate);
+  double lastObjective = DBL_MAX;
+
+  // Now iterate!
+  arma::vec gradient(iterate.n_cols);
+  for (size_t i = 1; i != maxIterations; ++i)
+  {
+    // Output current objective function.
+    Log::Info << "Gradient Descent: iteration " << i << ", objective " 
+      << overallObjective << "." << std::endl;
+
+    if (std::isnan(overallObjective) || std::isinf(overallObjective))
+    {
+      Log::Warn << "Gradient Descent: converged to " << overallObjective 
+        << "; terminating" << " with failure.  Try a smaller step size?" 
+        << std::endl;
+      return overallObjective;
+    }
+
+    if (std::abs(lastObjective - overallObjective) < tolerance)
+    {
+      Log::Info << "Gradient Descent: minimized within tolerance " 
+        << tolerance << "; " << "terminating optimization." << std::endl;
+      return overallObjective;
+    }
+
+    // Reset the counter variables.
+    lastObjective = overallObjective;
+
+    function.Gradient(iterate, gradient);
+
+    // And update the iterate.
+    iterate -= stepSize * gradient;
+
+    // Now add that to the overall objective function.
+    overallObjective = function.Evaluate(iterate);
+  }
+
+  Log::Info << "Gradient Descent: maximum iterations (" << maxIterations 
+    << ") reached; " << "terminating optimization." << std::endl;
+  return overallObjective;
+}
+
+} // namespace optimization
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/core/optimizers/gradient_descent/test_function.cpp b/src/mlpack/core/optimizers/gradient_descent/test_function.cpp
new file mode 100644
index 0000000..bf137fe
--- /dev/null
+++ b/src/mlpack/core/optimizers/gradient_descent/test_function.cpp
@@ -0,0 +1,23 @@
+/**
+ * @file test_function.cpp
+ * @author Sumedh Ghaisas
+ *
+ * Implementation of very simple test function for gradient descent.
+ */
+#include "test_function.hpp"
+
+using namespace mlpack;
+using namespace mlpack::optimization;
+using namespace mlpack::optimization::test;
+
+double GDTestFunction::Evaluate(const arma::mat& coordinates) const
+{
+  arma::vec temp = arma::trans(coordinates) * coordinates;
+  return temp(0, 0);
+}
+
+void GDTestFunction::Gradient(const arma::mat& coordinates,
+                              arma::mat& gradient) const
+{
+  gradient = 2 * coordinates;
+}
diff --git a/src/mlpack/core/optimizers/sgd/test_function.hpp b/src/mlpack/core/optimizers/gradient_descent/test_function.hpp
similarity index 55%
copy from src/mlpack/core/optimizers/sgd/test_function.hpp
copy to src/mlpack/core/optimizers/gradient_descent/test_function.hpp
index 7b059e1..db28171 100644
--- a/src/mlpack/core/optimizers/sgd/test_function.hpp
+++ b/src/mlpack/core/optimizers/gradient_descent/test_function.hpp
@@ -1,11 +1,11 @@
 /**
  * @file test_function.hpp
- * @author Ryan Curtin
+ * @author Sumedh Ghaisas
  *
  * Very simple test function for SGD.
  */
-#ifndef MLPACK_CORE_OPTIMIZERS_SGD_TEST_FUNCTION_HPP
-#define MLPACK_CORE_OPTIMIZERS_SGD_TEST_FUNCTION_HPP
+#ifndef MLPACK_CORE_OPTIMIZERS_GD_TEST_FUNCTION_HPP
+#define MLPACK_CORE_OPTIMIZERS_GD_TEST_FUNCTION_HPP
 
 #include <mlpack/core.hpp>
 
@@ -17,25 +17,20 @@ namespace test {
 //! functions.  The gradient is not very steep far away from the optimum, so a
 //! larger step size may be required to optimize it in a reasonable number of
 //! iterations.
-class SGDTestFunction
+class GDTestFunction
 {
  public:
   //! Nothing to do for the constructor.
-  SGDTestFunction() { }
-
-  //! Return 3 (the number of functions).
-  size_t NumFunctions() const { return 3; }
+  GDTestFunction() { }
 
   //! Get the starting point.
-  arma::mat GetInitialPoint() const { return arma::mat("6; -45.6; 6.2"); }
+  arma::mat GetInitialPoint() const { return arma::mat("1; 3; 2"); }
 
   //! Evaluate a function.
-  double Evaluate(const arma::mat& coordinates, const size_t i) const;
+  double Evaluate(const arma::mat& coordinates) const;
 
   //! Evaluate the gradient of a function.
-  void Gradient(const arma::mat& coordinates,
-                const size_t i,
-                arma::mat& gradient) const;
+  void Gradient(const arma::mat& coordinates, arma::mat& gradient) const;
 };
 
 } // namespace test
diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 9ad4092..4906d22 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -22,6 +22,7 @@ add_executable(mlpack_test
   fastmks_test.cpp
   feedforward_network_test.cpp
   gmm_test.cpp
+  gradient_descent_test.cpp
   hmm_test.cpp
   hoeffding_tree_test.cpp
   hyperplane_test.cpp
diff --git a/src/mlpack/tests/gradient_descent_test.cpp b/src/mlpack/tests/gradient_descent_test.cpp
new file mode 100644
index 0000000..0eb9a24
--- /dev/null
+++ b/src/mlpack/tests/gradient_descent_test.cpp
@@ -0,0 +1,52 @@
+/**
+ * @file gradient_descent_test.cpp
+ * @author Sumedh Ghaisas
+ *
+ * Test file for Gradient Descent optimizer.
+ */
+#include <mlpack/core.hpp>
+#include <mlpack/core/optimizers/gradient_descent/gradient_descent.hpp>
+#include <mlpack/core/optimizers/lbfgs/test_functions.hpp>
+#include <mlpack/core/optimizers/gradient_descent/test_function.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "test_tools.hpp"
+
+using namespace std;
+using namespace arma;
+using namespace mlpack;
+using namespace mlpack::optimization;
+using namespace mlpack::optimization::test;
+
+BOOST_AUTO_TEST_SUITE(GradientDescentTest);
+
+BOOST_AUTO_TEST_CASE(SimpleGDTestFunction)
+{
+  GDTestFunction f;
+  GradientDescent<GDTestFunction> s(f, 0.01, 5000000, 1e-9);
+
+  arma::vec coordinates = f.GetInitialPoint();
+  double result = s.Optimize(coordinates);
+
+  BOOST_REQUIRE_SMALL(result, 1e-4);
+  BOOST_REQUIRE_SMALL(coordinates[0], 1e-2);
+  BOOST_REQUIRE_SMALL(coordinates[1], 1e-2);
+  BOOST_REQUIRE_SMALL(coordinates[2], 1e-2);
+}
+
+BOOST_AUTO_TEST_CASE(RosenbrockTest)
+{
+  // Create the Rosenbrock function.
+  RosenbrockFunction f;
+
+  GradientDescent<RosenbrockFunction> s(f, 0.001, 0, 1e-15);
+
+  arma::mat coordinates = f.GetInitialPoint();
+  double result = s.Optimize(coordinates);
+
+  BOOST_REQUIRE_SMALL(result, 1e-10);
+  for (size_t j = 0; j < 2; ++j)
+    BOOST_REQUIRE_CLOSE(coordinates[j], (double) 1.0, 1e-3);
+}
+
+BOOST_AUTO_TEST_SUITE_END();