[mlpack-svn] r13800 - mlpack/trunk/src/mlpack/methods/nca

Wed Oct 31 15:03:24 EDT 2012

Author: rcurtin
Date: 2012-10-31 15:03:24 -0400 (Wed, 31 Oct 2012)
New Revision: 13800

Modified:
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
Log:
Fix formatting issues and prepare for new functions.


Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 18:13:08 UTC (rev 13799)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 19:03:24 UTC (rev 13800)
@@ -24,11 +24,16 @@
  * where x_n represents a point and A is the current scaling matrix.
  *
  * This class is more flexible than the original paper, allowing an arbitrary
- * kernel function to be used, meaning that the Mahalanobis distance is not the
- * only allowed way to run NCA.  However, the Mahalanobis distance is probably
- * the best way to use this.
+ * metric function to be used in place of || A x_i - A x_j ||^2, meaning that
+ * the squared Euclidean distance is not the only allowed metric for NCA.
+ * However, that is probably the best way to use this class.
+ *
+ * In addition to the standard Evaluate() and Gradient() functions which MLPACK
+ * optimizers use, overloads of Evaluate() and Gradient() are given which only
+ * operate on one point in the dataset.  This is useful for optimizers like
+ * stochastic gradient descent (see mlpack::optimization::SGD).
  */
-template<typename Kernel>
+template<typename MetricType = metric::SquaredEuclideanDistance>
 class SoftmaxErrorFunction
 {
  public:
@@ -44,10 +49,12 @@
    */
   SoftmaxErrorFunction(const arma::mat& dataset,
                        const arma::uvec& labels,
-                       Kernel kernel = Kernel());
+                       MetricType metric = MetricType());
 
   /**
-   * Evaluate the softmax function for the given covariance matrix.
+   * Evaluate the softmax function for the given covariance matrix.  This is the
+   * non-separable implementation, where the objective function is not
+   * decomposed into the sum of several objective functions.
    *
    * @param covariance Covariance matrix of Mahalanobis distance.
    */
@@ -55,7 +62,8 @@
 
   /**
    * Evaluate the gradient of the softmax function for the given covariance
-   * matrix.
+   * matrix.  This is the non-separable implementation, where the objective
+   * function is not decomposed into the sum of several objective functions.
    *
    * @param covariance Covariance matrix of Mahalanobis distance.
    * @param gradient Matrix to store the calculated gradient in.
@@ -68,23 +76,29 @@
   const arma::mat GetInitialPoint() const;
 
  private:
-  const arma::mat& dataset_;
-  const arma::uvec& labels_;
+  const arma::mat& dataset;
+  const arma::uvec& labels;
 
-  Kernel kernel_;
+  MetricType metric;
 
-  arma::mat last_coordinates_;
-  arma::mat stretched_dataset_;
-  arma::vec p_; // Holds calculated p_i.
-  arma::vec denominators_; // Holds denominators for calculation of p_ij.
+  //! Last coordinates.  Used for the non-separable Evaluate() and Gradient().
+  arma::mat lastCoordinates;
+  //! Stretched dataset.  Used for the non-separable Evaluate() and Gradient().
+  arma::mat stretchedDataset;
+  //! Holds calculated p_i, for the non-separable Evaluate() and Gradient().
+  arma::vec p;
+  //! Holds denominators for calculation of p_ij, for the non-separable
+  //! Evaluate() and Gradient().
+  arma::vec denominators;
 
-  //! False is nothing has ever been precalculated (only at construction time).
-  bool precalculated_;
+  //! False if nothing has ever been precalculated (only at construction time).
+  bool precalculated;
 
   /**
    * Precalculate the denominators and numerators that will make up the p_ij,
    * but only if the coordinates matrix is different than the last coordinates
-   * the Precalculate() method was run with.
+   * the Precalculate() method was run with.  This method is only called by the
+   * non-separable Evaluate() and Gradient().
    *
    * This will update last_coordinates_ and stretched_dataset_, and also
    * calculate the p_i and denominators_ which are used in the calculation of

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 18:13:08 UTC (rev 13799)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 19:03:24 UTC (rev 13800)
@@ -14,28 +14,29 @@
 namespace nca {
 
 // Initialize with the given kernel.
-template<typename Kernel>
-SoftmaxErrorFunction<Kernel>::SoftmaxErrorFunction(const arma::mat& dataset,
-                                                   const arma::uvec& labels,
-                                                   Kernel kernel) :
-  dataset_(dataset), labels_(labels), kernel_(kernel),
-  last_coordinates_(dataset.n_rows, dataset.n_rows),
-  precalculated_(false)
+template<typename MetricType>
+SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(const arma::mat& dataset,
+                                                       const arma::uvec& labels,
+                                                       MetricType metric) :
+  dataset(dataset),
+  labels(labels),
+  metric(metric),
+  precalculated(false)
 { /* nothing to do */ }
 
-template<typename Kernel>
-double SoftmaxErrorFunction<Kernel>::Evaluate(const arma::mat& coordinates)
+template<typename MetricType>
+double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates)
 {
   // Calculate the denominators and numerators, if necessary.
   Precalculate(coordinates);
 
-  return -accu(p_); // Sum of p_i for all i.  We negate because our solver
+  return -accu(p); // Sum of p_i for all i.  We negate because our solver
                     // minimizes, not maximizes.
 };
 
-template<typename Kernel>
-void SoftmaxErrorFunction<Kernel>::Gradient(const arma::mat& coordinates,
-                                            arma::mat& gradient)
+template<typename MetricType>
+void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
+                                                arma::mat& gradient)
 {
   // Calculate the denominators and numerators, if necessary.
   Precalculate(coordinates);
@@ -53,26 +54,26 @@
   //   otherwise, add
   //     (p_i p_ik + p_k p_ki) x_ik x_ik^T
   arma::mat sum;
-  sum.zeros(stretched_dataset_.n_rows, stretched_dataset_.n_rows);
-  for (size_t i = 0; i < stretched_dataset_.n_cols; i++)
+  sum.zeros(stretchedDataset.n_rows, stretchedDataset.n_rows);
+  for (size_t i = 0; i < stretchedDataset.n_cols; i++)
   {
-    for (size_t k = (i + 1); k < stretched_dataset_.n_cols; k++)
+    for (size_t k = (i + 1); k < stretchedDataset.n_cols; k++)
     {
       // Calculate p_ik and p_ki first.
-      double eval = exp(-kernel_.Evaluate(stretched_dataset_.unsafe_col(i),
-                                          stretched_dataset_.unsafe_col(k)));
+      double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                         stretchedDataset.unsafe_col(k)));
       double p_ik = 0, p_ki = 0;
-      p_ik = eval / denominators_(i);
-      p_ki = eval / denominators_(k);
+      p_ik = eval / denominators(i);
+      p_ki = eval / denominators(k);
 
       // Subtract x_i from x_k.  We are not using stretched points here.
-      arma::vec x_ik = dataset_.col(i) - dataset_.col(k);
-      arma::mat second_term = (x_ik * trans(x_ik));
+      arma::vec x_ik = dataset.col(i) - dataset.col(k);
+      arma::mat secondTerm = (x_ik * trans(x_ik));
 
-      if (labels_[i] == labels_[k])
-        sum += ((p_[i] - 1) * p_ik + (p_[k] - 1) * p_ki) * second_term;
+      if (labels[i] == labels[k])
+        sum += ((p[i] - 1) * p_ik + (p[k] - 1) * p_ki) * secondTerm;
       else
-        sum += (p_[i] * p_ik + p_[k] * p_ki) * second_term;
+        sum += (p[i] * p_ik + p[k] * p_ki) * secondTerm;
     }
   }
 
@@ -80,23 +81,27 @@
   gradient = -2 * coordinates * sum;
 }
 
-template<typename Kernel>
-const arma::mat SoftmaxErrorFunction<Kernel>::GetInitialPoint() const
+template<typename MetricType>
+const arma::mat SoftmaxErrorFunction<MetricType>::GetInitialPoint() const
 {
-  return arma::eye<arma::mat>(dataset_.n_rows, dataset_.n_rows);
+  return arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
 }
 
-template<typename Kernel>
-void SoftmaxErrorFunction<Kernel>::Precalculate(const arma::mat& coordinates)
+template<typename MetricType>
+void SoftmaxErrorFunction<MetricType>::Precalculate(
+    const arma::mat& coordinates)
 {
+  // Ensure it is the right size.
+  lastCoordinates.set_size(coordinates.n_rows, coordinates.n_cols);
+
   // Make sure the calculation is necessary.
-  if ((accu(coordinates == last_coordinates_) == coordinates.n_elem) &&
-      precalculated_)
+  if ((accu(coordinates == lastCoordinates) == coordinates.n_elem) &&
+      precalculated)
     return; // No need to calculate; we already have this stuff saved.
 
   // Coordinates are different; save the new ones, and stretch the dataset.
-  last_coordinates_ = coordinates;
-  stretched_dataset_ = coordinates * dataset_;
+  lastCoordinates = coordinates;
+  stretchedDataset = coordinates * dataset;
 
   // For each point i, we must evaluate the softmax function:
   //   p_ij = exp( -K(x_i, x_j) ) / ( sum_{k != i} ( exp( -K(x_i, x_k) )))
@@ -104,47 +109,47 @@
   // We will do this by keeping track of the denominators for each i as well as
   // the numerators (the sum for all j in class of i).  This will be on the
   // order of O((n * (n + 1)) / 2), which really isn't all that great.
-  p_.zeros(stretched_dataset_.n_cols);
-  denominators_.zeros(stretched_dataset_.n_cols);
-  for (size_t i = 0; i < stretched_dataset_.n_cols; i++)
+  p.zeros(stretchedDataset.n_cols);
+  denominators.zeros(stretchedDataset.n_cols);
+  for (size_t i = 0; i < stretchedDataset.n_cols; i++)
   {
-    for (size_t j = (i + 1); j < stretched_dataset_.n_cols; j++)
+    for (size_t j = (i + 1); j < stretchedDataset.n_cols; j++)
     {
-      // Evaluate exp(-K(x_i, x_j)).
-      double eval = exp(-kernel_.Evaluate(stretched_dataset_.unsafe_col(i),
-                                          stretched_dataset_.unsafe_col(j)));
+      // Evaluate exp(-d(x_i, x_j)).
+      double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                          stretchedDataset.unsafe_col(j)));
 
       // Add this to the denominators of both i and j: p_ij = p_ji.
-      denominators_[i] += eval;
-      denominators_[j] += eval;
+      denominators[i] += eval;
+      denominators[j] += eval;
 
       // If i and j are the same class, add to numerator of both.
-      if (labels_[i] == labels_[j])
+      if (labels[i] == labels[j])
       {
-        p_[i] += eval;
-        p_[j] += eval;
+        p[i] += eval;
+        p[j] += eval;
       }
     }
   }
 
   // Divide p_i by their denominators.
-  p_ /= denominators_;
+  p /= denominators;
 
   // Clean up any bad values.
-  for (size_t i = 0; i < stretched_dataset_.n_cols; i++)
+  for (size_t i = 0; i < stretchedDataset.n_cols; i++)
   {
-    if (denominators_[i] == 0.0)
+    if (denominators[i] == 0.0)
     {
       Log::Debug << "Denominator of p_{" << i << ", j} is 0." << std::endl;
 
       // Set to usable values.
-      denominators_[i] = std::numeric_limits<double>::infinity();
-      p_[i] = 0;
+      denominators[i] = std::numeric_limits<double>::infinity();
+      p[i] = 0;
     }
   }
 
   // We've done a precalculation.  Mark it as done.
-  precalculated_ = true;
+  precalculated = true;
 }
 
 }; // namespace nca