[mlpack-git] master: Add support for MiniBatchSGD. (4f4bd20)

Mon Feb 22 13:07:48 EST 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/0341d4d82c030dd7bcf91f5dfe7b9e452b7b3cdc...4f4bd200d2617a5448c3377a732d7aaa8206e92a

>---------------------------------------------------------------

commit 4f4bd200d2617a5448c3377a732d7aaa8206e92a
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Feb 22 10:07:48 2016 -0800

    Add support for MiniBatchSGD.


>---------------------------------------------------------------

4f4bd200d2617a5448c3377a732d7aaa8206e92a
 .../logistic_regression_main.cpp                   | 58 ++++++++++++++++------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp b/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
index 8405253..4f4de07 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
@@ -8,6 +8,7 @@
 #include "logistic_regression.hpp"
 
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
+#include <mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp>
 
 using namespace std;
 using namespace mlpack;
@@ -38,16 +39,23 @@ PROGRAM_INFO("L2-regularized Logistic Regression and Prediction",
     "When a model is being trained, there are many options.  L2 regularization "
     "(to prevent overfitting) can be specified with the -l option, and the "
     "optimizer used to train the model can be specified with the --optimizer "
-    "option.  Available options are 'sgd' (stochastic gradient descent) and "
-    "'lbfgs' (the L-BFGS optimizer).  There are also various parameters for the"
-    " optimizer; the --max_iterations parameter specifies the maximum number of"
-    " allowed iterations, and the --tolerance (-e) parameter specifies the "
-    "tolerance for convergence.  For the SGD optimizer, the --step_size "
-    "parameter controls the step size taken at each iteration by the optimizer."
-    "  If the objective function for your data is oscillating between Inf and "
-    "0, the step size is probably too large.  There are more parameters for the"
-    " SGD and L-BFGS optimizers, but the C++ interface must be used to access "
-    "these."
+    "option.  Available options are 'sgd' (stochastic gradient descent), "
+    "'lbfgs' (the L-BFGS optimizer), and 'minibatch-sgd' (minibatch stochastic "
+    "gradient descent).  There are also various parameters for the optimizer; "
+    "the --max_iterations parameter specifies the maximum number of allowed "
+     "iterations, and the --tolerance (-e) parameter specifies the tolerance "
+    "for convergence.  For the SGD and mini-batch SGD optimizers, the "
+    "--step_size parameter controls the step size taken at each iteration by "
+    "the optimizer.  The batch size for mini-batch SGD is controlled with the "
+    "--batch_size (-b) parameter.  If the objective function for your data is "
+    "oscillating between Inf and 0, the step size is probably too large.  There"
+    " are more parameters for the optimizers, but the C++ interface must be "
+    "used to access these."
+    "\n\n"
+    "For SGD, an iteration refers to a single point, and for mini-batch SGD, an"
+    " iteration refers to a single batch.  So to take a single pass over the "
+    "dataset with SGD, --max_iterations should be set to the number of points "
+    "in the dataset."
     "\n\n"
     "Optionally, the model can be used to predict the responses for another "
     "matrix of data points, if --test_file is specified.  The --test_file "
@@ -73,7 +81,9 @@ PARAM_STRING("optimizer", "Optimizer to use for training ('lbfgs' or 'sgd').",
 PARAM_DOUBLE("tolerance", "Convergence tolerance for optimizer.", "e", 1e-10);
 PARAM_INT("max_iterations", "Maximum iterations for optimizer (0 indicates no "
     "limit).", "n", 10000);
-PARAM_DOUBLE("step_size", "Step size for SGD optimizer.", "s", 0.01);
+PARAM_DOUBLE("step_size", "Step size for SGD and mini-batch SGD optimizers.",
+    "s", 0.01);
+PARAM_INT("batch_size", "Batch size for mini-batch SGD.", "b", 50);
 
 // Model loading/saving.
 PARAM_STRING("input_model_file", "File containing existing model (parameters).",
@@ -100,6 +110,7 @@ int main(int argc, char** argv)
   const string optimizerType = CLI::GetParam<string>("optimizer");
   const double tolerance = CLI::GetParam<double>("tolerance");
   const double stepSize = CLI::GetParam<double>("step_size");
+  const size_t batchSize = (size_t) CLI::GetParam<int>("batch_size");
   const size_t maxIterations = (size_t) CLI::GetParam<int>("max_iterations");
   const string inputModelFile = CLI::GetParam<string>("input_model_file");
   const string outputModelFile = CLI::GetParam<string>("output_model_file");
@@ -124,8 +135,10 @@ int main(int argc, char** argv)
         << endl;
 
   // Optimizer has to be L-BFGS or SGD.
-  if (optimizerType != "lbfgs" && optimizerType != "sgd")
-    Log::Fatal << "--optimizer must be 'lbfgs' or 'sgd'." << endl;
+  if (optimizerType != "lbfgs" && optimizerType != "sgd" &&
+      optimizerType != "minibatch-sgd")
+    Log::Fatal << "--optimizer must be 'lbfgs', 'sgd', or 'minibatch-sgd'."
+        << endl;
 
   // Lambda must be positive.
   if (lambda < 0.0)
@@ -137,7 +150,8 @@ int main(int argc, char** argv)
     Log::Fatal << "Decision boundary (--decision_boundary) must be between 0.0 "
         << "and 1.0 (received " << decisionBoundary << ")." << endl;
 
-  if ((stepSize < 0.0) && (optimizerType == "sgd"))
+  if ((stepSize < 0.0) &&
+      (optimizerType == "sgd" || optimizerType == "minibatch-sgd"))
     Log::Fatal << "Step size (--step_size) must be positive (received "
         << stepSize << ")." << endl;
 
@@ -145,6 +159,10 @@ int main(int argc, char** argv)
     Log::Warn << "Step size (--step_size) ignored because 'sgd' optimizer is "
         << "not being used." << endl;
 
+  if (CLI::HasParam("batch_size") && optimizerType != "minibatch-sgd")
+    Log::Warn << "Batch size (--batch_size) ignored because 'minibatch-sgd' "
+        << "optimizer is not being used." << endl;
+
   // These are the matrices we might use.
   arma::mat regressors;
   arma::Mat<size_t> responsesMat;
@@ -222,6 +240,18 @@ int main(int argc, char** argv)
       // This will train the model.
       model.Train(lbfgsOpt);
     }
+    else if (optimizerType == "minibatch-sgd")
+    {
+      MiniBatchSGD<LogisticRegressionFunction<>> mbsgdOpt(lrf);
+      mbsgdOpt.BatchSize() = batchSize;
+      mbsgdOpt.Tolerance() = tolerance;
+      mbsgdOpt.StepSize() = stepSize;
+      mbsgdOpt.MaxIterations() = maxIterations;
+      Log::Info << "Training model with mini-batch SGD optimizer (batch size "
+          << batchSize << ")." << endl;
+
+      model.Train(mbsgdOpt);
+    }
   }
 
   if (!testFile.empty())