[mlpack-svn] r13802 - mlpack/trunk/src/mlpack/methods/nca

Wed Oct 31 15:54:06 EDT 2012

Author: rcurtin
Date: 2012-10-31 15:54:06 -0400 (Wed, 31 Oct 2012)
New Revision: 13802

Modified:
   mlpack/trunk/src/mlpack/methods/nca/nca.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
Log:
Implement decomposable error function and gradient, then make NCA use SGD and
not L-BFGS.  That's a lot of acronyms...  for now, parameters to SGD are
specifiable through the NCA interface, but I think in the long run NCA should be
templatized to use an arbitrary optimizer.


Modified: mlpack/trunk/src/mlpack/methods/nca/nca.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/nca/nca.hpp	2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca.hpp	2012-10-31 19:54:06 UTC (rev 13802)
@@ -36,17 +36,27 @@
  * }
  * @endcode
  */
-template<typename Kernel>
+template<typename MetricType>
 class NCA
 {
  public:
   /**
    * Construct the Neighborhood Components Analysis object.  This simply stores
-   * the reference to the dataset, before the actual optimization is performed.
+   * the reference to the dataset and labels as well as the parameters for
+   * optimization before the actual optimization is performed.
    *
    * @param dataset Input dataset.
+   * @param labels Input dataset labels.
+   * @param stepSize Step size for stochastic gradient descent.
+   * @param maxIterations Maximum iterations for stochastic gradient descent.
+   * @param tolerance Tolerance for termination of stochastic gradient descent.
    */
-  NCA(const arma::mat& dataset, const arma::uvec& labels);
+  NCA(const arma::mat& dataset,
+      const arma::uvec& labels,
+      const double stepSize = 0.01,
+      const size_t maxIterations = 500000,
+      const double tolerance = 1e-5,
+      MetricType metric = MetricType());
 
   /**
    * Perform Neighborhood Components Analysis.  The output distance learning
@@ -56,9 +66,41 @@
    */
   void LearnDistance(arma::mat& outputMatrix);
 
+  //! Get the dataset reference.
+  const arma::mat& Dataset() const { return dataset; }
+  //! Get the labels reference.
+  const arma::uvec& Labels() const { return labels; }
+
+  //! Get the step size for stochastic gradient descent.
+  double StepSize() const { return stepSize; }
+  //! Modify the step size for stochastic gradient descent.
+  double& StepSize() { return stepSize; }
+
+  //! Get the maximum number of iterations for stochastic gradient descent.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the maximum number of iterations for stochastic gradient descent.
+  size_t& MaxIterations() { return maxIterations; }
+
+  //! Get the tolerance for the termination of stochastic gradient descent.
+  double Tolerance() const { return tolerance; }
+  //! Modify the tolerance for the termination of stochastic gradient descent.
+  double& Tolerance() { return tolerance; }
+
  private:
+  //! Dataset reference.
   const arma::mat& dataset;
+  //! Labels reference.
   const arma::uvec& labels;
+
+  //! Metric to be used.
+  MetricType metric;
+
+  //! Step size for stochastic gradient descent.
+  double stepSize;
+  //! Maximum iterations for stochastic gradient descent.
+  size_t maxIterations;
+  //! Tolerance for termination of stochastic gradient descent.
+  double tolerance;
 };
 
 }; // namespace nca

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp	2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp	2012-10-31 19:54:06 UTC (rev 13802)
@@ -10,7 +10,7 @@
 // In case it was not already included.
 #include "nca.hpp"
 
-#include <mlpack/core/optimizers/lbfgs/lbfgs.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
 
 #include "nca_softmax_error_function.hpp"
 
@@ -18,25 +18,37 @@
 namespace nca {
 
 // Just set the internal matrix reference.
-template<typename Kernel>
-NCA<Kernel>::NCA(const arma::mat& dataset, const arma::uvec& labels) :
-    dataset(dataset), labels(labels) { /* nothing to do */ }
+template<typename MetricType>
+NCA<MetricType>::NCA(const arma::mat& dataset,
+                     const arma::uvec& labels,
+                     const double stepSize,
+                     const size_t maxIterations,
+                     const double tolerance,
+                     MetricType metric) :
+    dataset(dataset),
+    labels(labels),
+    metric(metric),
+    stepSize(stepSize),
+    maxIterations(maxIterations),
+    tolerance(tolerance)
+{ /* Nothing to do. */ }
 
-template<typename Kernel>
-void NCA<Kernel>::LearnDistance(arma::mat& outputMatrix)
+template<typename MetricType>
+void NCA<MetricType>::LearnDistance(arma::mat& outputMatrix)
 {
   outputMatrix = arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
 
-  SoftmaxErrorFunction<Kernel> errorFunc(dataset, labels);
+  SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels, metric);
 
-  // We will use the L-BFGS optimizer to optimize the stretching matrix.
-  optimization::L_BFGS<SoftmaxErrorFunction<Kernel> > lbfgs(errorFunc, 10);
+  // We will use stochastic gradient descent to optimize the NCA error function.
+  optimization::SGD<SoftmaxErrorFunction<MetricType> > sgd(errorFunc, stepSize,
+      maxIterations, tolerance);
 
-  Timer::Start("nca_lbfgs_optimization");
+  Timer::Start("nca_sgd_optimization");
 
-  lbfgs.Optimize(outputMatrix);
+  sgd.Optimize(outputMatrix);
 
-  Timer::Stop("nca_lbfgs_optimization");
+  Timer::Stop("nca_sgd_optimization");
 }
 
 }; // namespace nca

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 19:54:06 UTC (rev 13802)
@@ -61,6 +61,18 @@
   double Evaluate(const arma::mat& covariance);
 
   /**
+   * Evaluate the softmax objective function for the given covariance matrix on
+   * only one point of the dataset.  This is the separable implementation, where
+   * the objective function is decomposed into the sum of many objective
+   * functions, and here, only one of those constituent objective functions is
+   * returned.
+   *
+   * @param covariance Covariance matrix of Mahalanobis distance.
+   * @param i Index of point to use for objective function.
+   */
+  double Evaluate(const arma::mat& covariance, const size_t i);
+
+  /**
    * Evaluate the gradient of the softmax function for the given covariance
    * matrix.  This is the non-separable implementation, where the objective
    * function is not decomposed into the sum of several objective functions.
@@ -71,10 +83,31 @@
   void Gradient(const arma::mat& covariance, arma::mat& gradient);
 
   /**
+   * Evaluate the gradient of the softmax function for the given covariance
+   * matrix on only one point of the dataset.  This is the separable
+   * implementation, where the objective function is decomposed into the sum of
+   * many objective functions, and here, only one of those constituent objective
+   * functions is returned.
+   *
+   * @param covariance Covariance matrix of Mahalanobis distance.
+   * @param i Index of point to use for objective function.
+   * @param gradient Matrix to store the calculated gradient in.
+   */
+  void Gradient(const arma::mat& covariance,
+                const size_t i,
+                arma::mat& gradient);
+
+  /**
    * Get the initial point.
    */
   const arma::mat GetInitialPoint() const;
 
+  /**
+   * Get the number of functions the objective function can be decomposed into.
+   * This is just the number of points in the dataset.
+   */
+  size_t NumFunctions() const { return dataset.n_cols; }
+
  private:
   const arma::mat& dataset;
   const arma::uvec& labels;
@@ -83,7 +116,7 @@
 
   //! Last coordinates.  Used for the non-separable Evaluate() and Gradient().
   arma::mat lastCoordinates;
-  //! Stretched dataset.  Used for the non-separable Evaluate() and Gradient().
+  //! Stretched dataset.  Kept internal to avoid memory reallocations.
   arma::mat stretchedDataset;
   //! Holds calculated p_i, for the non-separable Evaluate() and Gradient().
   arma::vec p;

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 19:54:06 UTC (rev 13802)
@@ -24,6 +24,7 @@
   precalculated(false)
 { /* nothing to do */ }
 
+//! The non-separable implementation, which uses Precalculate() to save time.
 template<typename MetricType>
 double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates)
 {
@@ -31,10 +32,53 @@
   Precalculate(coordinates);
 
   return -accu(p); // Sum of p_i for all i.  We negate because our solver
-                    // minimizes, not maximizes.
+                   // minimizes, not maximizes.
 };
 
+//! The separated objective function, which does not use Precalculate().
 template<typename MetricType>
+double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates,
+                                                  const size_t i)
+{
+  // Unfortunately each evaluation will take O(N) time because it requires a
+  // scan over all points in the dataset.  Our objective is to compute p_ij.
+  double denominator = 0;
+  double numerator = 0;
+
+  // It's quicker to do this now than one point at a time later.
+  stretchedDataset = coordinates * dataset;
+
+  for (size_t k = 0; k < dataset.n_cols; ++k)
+  {
+    // Don't consider the case where the points are the same.
+    if (k == i)
+      continue;
+
+    // We want to evaluate exp(-D(A x_i, A x_k)).
+    double eval = std::exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                            stretchedDataset.unsafe_col(k)));
+
+    // If they are in the same
+    if (labels[i] == labels[k])
+      numerator += eval;
+
+    denominator += eval;
+  }
+
+  // Now the result is just a simple division, but we have to be sure that the
+  // denominator is not 0.
+  if (denominator == 0.0)
+  {
+    Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl;
+    return 0;
+  }
+  else
+    return -(numerator / denominator); // Negate because the optimizer is a
+                                       // minimizer.
+}
+
+//! The non-separable implementation, where Precalculate() is used.
+template<typename MetricType>
 void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
                                                 arma::mat& gradient)
 {
@@ -81,7 +125,66 @@
   gradient = -2 * coordinates * sum;
 }
 
+//! The separable implementation.
 template<typename MetricType>
+void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
+                                                const size_t i,
+                                                arma::mat& gradient)
+{
+  // We will need to calculate p_i before this evaluation is done, so these two
+  // variables will hold the information necessary for that.
+  double numerator = 0;
+  double denominator = 0;
+
+  // The gradient involves two matrix terms which are eventually combined into
+  // one.
+  arma::mat firstTerm(coordinates.n_rows, coordinates.n_cols);
+  arma::mat secondTerm(coordinates.n_rows, coordinates.n_cols);
+
+  // Compute the stretched dataset.
+  stretchedDataset = coordinates * dataset;
+
+  for (size_t k = 0; k < dataset.n_cols; ++k)
+  {
+    // Don't consider the case where the points are the same.
+    if (i == k)
+      continue;
+
+    // Calculate p_ik.
+    double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                       stretchedDataset.unsafe_col(k)));
+
+    // If the points are in the same class, we must add to the second term of
+    // the gradient as well as the numerator of p_i.
+    if (labels[i] == labels[k])
+    {
+      numerator += eval;
+      secondTerm += eval *
+          (stretchedDataset.col(i) - stretchedDataset.col(k)) *
+          arma::trans(stretchedDataset.col(i) - stretchedDataset.col(k));
+    }
+
+    // We always have to add to the denominator of p_i and the first term of the
+    // gradient computation.
+    denominator += eval;
+    firstTerm += eval *
+        (stretchedDataset.col(i) - stretchedDataset.col(k)) *
+        arma::trans(stretchedDataset.col(i) - stretchedDataset.col(k));
+  }
+
+  // Calculate p_i.
+  double p = 0;
+  if (denominator == 0)
+    Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl;
+  else
+    p = numerator / denominator;
+
+  // Now multiply the first term by p_i, and add the two together and multiply
+  // all by 2 * A.  We negate it though, because our optimizer is a minimizer.
+  gradient = -2 * coordinates * (p * firstTerm - secondTerm);
+}
+
+template<typename MetricType>
 const arma::mat SoftmaxErrorFunction<MetricType>::GetInitialPoint() const
 {
   return arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
@@ -117,7 +220,7 @@
     {
       // Evaluate exp(-d(x_i, x_j)).
       double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                          stretchedDataset.unsafe_col(j)));
+                                         stretchedDataset.unsafe_col(j)));
 
       // Add this to the denominators of both i and j: p_ij = p_ji.
       denominators[i] += eval;