[mlpack-git] master: Add a relative objective value termination criteria (460fe44)

Thu Mar 5 22:14:40 EST 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 460fe44a33c3feba13419abfcbb89955fab46471
Author: Stephen Tu <tu.stephenl at gmail.com>
Date:   Thu Jan 1 19:45:37 2015 -0800

    Add a relative objective value termination criteria


>---------------------------------------------------------------

460fe44a33c3feba13419abfcbb89955fab46471
 src/mlpack/core/optimizers/lbfgs/lbfgs.hpp      | 12 +++++--
 src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp | 42 ++++++++++++++++++++++---
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
index 0cbae2d..18d3e8d 100644
--- a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
+++ b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
@@ -50,11 +50,12 @@ class L_BFGS
    * @param maxStep The maximum step of the line search.
    */
   L_BFGS(FunctionType& function,
-         const size_t numBasis = 5, /* entirely arbitrary */
+         const size_t numBasis = 10, /* same default as scipy */
          const size_t maxIterations = 0, /* run forever */
          const double armijoConstant = 1e-4,
          const double wolfe = 0.9,
-         const double minGradientNorm = 1e-10,
+         const double minGradientNorm = 1e-6,
+         const double factr = 1e-15,
          const size_t maxLineSearchTrials = 50,
          const double minStep = 1e-20,
          const double maxStep = 1e20);
@@ -124,6 +125,11 @@ class L_BFGS
   //! Modify the minimum gradient norm.
   double& MinGradientNorm() { return minGradientNorm; }
 
+  //! Get the factr value.
+  double Factr() const { return factr; }
+  //! Modify the factr value.
+  double& Factr() { return factr; }
+
   //! Get the maximum number of line search trials.
   size_t MaxLineSearchTrials() const { return maxLineSearchTrials; }
   //! Modify the maximum number of line search trials.
@@ -163,6 +169,8 @@ class L_BFGS
   double wolfe;
   //! Minimum gradient norm required to continue the optimization.
   double minGradientNorm;
+  //! Minimum relative function value decrease to continue the optimization.
+  double factr;
   //! Maximum number of trials for the line search.
   size_t maxLineSearchTrials;
   //! Minimum step of the line search.
diff --git a/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp b/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp
index be4d473..3527d4d 100644
--- a/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp
+++ b/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp
@@ -34,6 +34,7 @@ L_BFGS<FunctionType>::L_BFGS(FunctionType& function,
                              const double armijoConstant,
                              const double wolfe,
                              const double minGradientNorm,
+                             const double factr,
                              const size_t maxLineSearchTrials,
                              const double minStep,
                              const double maxStep) :
@@ -43,6 +44,7 @@ L_BFGS<FunctionType>::L_BFGS(FunctionType& function,
     armijoConstant(armijoConstant),
     wolfe(wolfe),
     minGradientNorm(minGradientNorm),
+    factr(factr),
     maxLineSearchTrials(maxLineSearchTrials),
     minStep(minStep),
     maxStep(maxStep)
@@ -215,10 +217,18 @@ bool L_BFGS<FunctionType>::LineSearch(double& functionValue,
 
     // Terminate when the step size gets too small or too big or it
     // exceeds the max number of iterations.
-    if ((stepSize < minStep) || (stepSize > maxStep) ||
-        (numIterations >= maxLineSearchTrials))
+    const bool cond1 = (stepSize < minStep);
+    const bool cond2 = (stepSize > maxStep);
+    const bool cond3 = (numIterations >= maxLineSearchTrials);
+    if (cond1 || cond2 || cond3)
     {
-      return false;
+      if (cond1)
+        Log::Debug << "stepSize < minStep" << std::endl;
+      if (cond2)
+        Log::Debug << "stepSize > maxStep" << std::endl;
+      if (cond3)
+        Log::Debug << "numIterations >= maxLineSearchTrials (stepSize=" << stepSize << ")" << std::endl;
+      break;
     }
 
     // Scale the step size.
@@ -355,6 +365,7 @@ double L_BFGS<FunctionType>::Optimize(arma::mat& iterate,
 
   // The initial function value.
   double functionValue = Evaluate(iterate);
+  double prevFunctionValue = functionValue;
 
   // The gradient: the current and the old.
   arma::mat gradient;
@@ -374,10 +385,18 @@ double L_BFGS<FunctionType>::Optimize(arma::mat& iterate,
        ++itNum)
   {
     Log::Debug << "L-BFGS iteration " << itNum << "; objective " <<
-        function.Evaluate(iterate) << "." << std::endl;
+        function.Evaluate(iterate) << ", gradient norm " <<
+        arma::norm(gradient, 2) << ", " <<
+        ((prevFunctionValue - functionValue) /
+         std::max(std::max(fabs(prevFunctionValue), fabs(functionValue)), 1.0)) << "." << std::endl;
+
+    prevFunctionValue = functionValue;
 
     // Break when the norm of the gradient becomes too small.
-    if (GradientNormTooSmall(gradient))
+    //
+    // But don't do this on the first iteration to ensure we always take at
+    // least one descent step.
+    if (itNum > 0 && GradientNormTooSmall(gradient))
     {
       Log::Debug << "L-BFGS gradient norm too small (terminating successfully)."
           << std::endl;
@@ -411,6 +430,19 @@ double L_BFGS<FunctionType>::Optimize(arma::mat& iterate,
       break;
     }
 
+    // If we can't make progress on the gradient, then we'll also accept
+    // a stable function value
+    const double denom =
+      std::max(
+        std::max(fabs(prevFunctionValue), fabs(functionValue)),
+        1.0);
+    if ((prevFunctionValue - functionValue) / denom <= factr)
+    {
+      Log::Debug << "L-BFGS function value stable (terminating successfully)."
+          << std::endl;
+      break;
+    }
+
     // Overwrite an old basis set.
     UpdateBasisSet(itNum, iterate, oldIterate, gradient, oldGradient);