[mlpack-svn] r13804 - mlpack/trunk/src/mlpack/methods/nca

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Oct 31 16:50:28 EDT 2012


Author: rcurtin
Date: 2012-10-31 16:50:28 -0400 (Wed, 31 Oct 2012)
New Revision: 13804

Modified:
   mlpack/trunk/src/mlpack/methods/nca/nca.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
Log:
Allow normalization of points, to prevent underflows.


Modified: mlpack/trunk/src/mlpack/methods/nca/nca.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca.hpp	2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca.hpp	2012-10-31 20:50:28 UTC (rev 13804)
@@ -43,19 +43,26 @@
   /**
    * Construct the Neighborhood Components Analysis object.  This simply stores
    * the reference to the dataset and labels as well as the parameters for
-   * optimization before the actual optimization is performed.
+   * optimization before the actual optimization is performed.  In cases where
+   * points in the dataset are far apart (>700), some calculations will
+   * underflow; in this case, normalizeDistances should be set to true (it is by
+   * default).  It can be set to false for very minor speed gains.
    *
    * @param dataset Input dataset.
    * @param labels Input dataset labels.
    * @param stepSize Step size for stochastic gradient descent.
    * @param maxIterations Maximum iterations for stochastic gradient descent.
    * @param tolerance Tolerance for termination of stochastic gradient descent.
+   * @param normalizeDistances Whether or not distances should be normalized;
+   *     this is useful when the points in the dataset are far apart.
+   * @param metric Instantiated metric type.
    */
   NCA(const arma::mat& dataset,
       const arma::uvec& labels,
       const double stepSize = 0.01,
       const size_t maxIterations = 500000,
       const double tolerance = 1e-5,
+      const bool normalizeDistances = true,
       MetricType metric = MetricType());
 
   /**
@@ -101,6 +108,8 @@
   size_t maxIterations;
   //! Tolerance for termination of stochastic gradient descent.
   double tolerance;
+  //! Whether or not distances should be normalized in the error function.
+  bool normalizeDistances;
 };
 
 }; // namespace nca

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp	2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp	2012-10-31 20:50:28 UTC (rev 13804)
@@ -24,13 +24,15 @@
                      const double stepSize,
                      const size_t maxIterations,
                      const double tolerance,
+                     const bool normalizeDistances,
                      MetricType metric) :
     dataset(dataset),
     labels(labels),
     metric(metric),
     stepSize(stepSize),
     maxIterations(maxIterations),
-    tolerance(tolerance)
+    tolerance(tolerance),
+    normalizeDistances(normalizeDistances)
 { /* Nothing to do. */ }
 
 template<typename MetricType>
@@ -38,11 +40,12 @@
 {
   outputMatrix = arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
 
-  SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels, metric);
+  SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels,
+      normalizeDistances, metric);
 
   // We will use stochastic gradient descent to optimize the NCA error function.
   optimization::SGD<SoftmaxErrorFunction<MetricType> > sgd(errorFunc, stepSize,
-      maxIterations, tolerance);
+      maxIterations, tolerance, normalizeDistances);
 
   Timer::Start("nca_sgd_optimization");
 

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp	2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp	2012-10-31 20:50:28 UTC (rev 13804)
@@ -33,6 +33,8 @@
     "gradient descent (0 indicates no limit).", "n", 500000);
 PARAM_DOUBLE("tolerance", "Maximum tolerance for termination of stochastic "
     "gradient descent.", "t", 1e-7);
+PARAM_FLAG("no_normalization", "Do not normalize distances (this should not be"
+    "set if squared distances between points are greater than 700).", "N");
 
 using namespace mlpack;
 using namespace mlpack::nca;
@@ -52,6 +54,7 @@
   const double stepSize = CLI::GetParam<double>("step_size");
   const size_t maxIterations = CLI::GetParam<int>("max_iterations");
   const double tolerance = CLI::GetParam<double>("tolerance");
+  const bool normalize = !CLI::HasParam("no_normalization");
 
   // Load data.
   mat data;
@@ -79,7 +82,7 @@
 
   // Now create the NCA object and run the optimization.
   NCA<LMetric<2> > nca(data, labels.unsafe_col(0), stepSize, maxIterations,
-      tolerance);
+      tolerance, normalize);
 
   mat distance;
   nca.LearnDistance(distance);

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 20:50:28 UTC (rev 13804)
@@ -42,6 +42,9 @@
    * store, which is set elsewhere.  If no kernel is given, an empty kernel is
    * used; this way, you can call the constructor with no arguments.  A
    * reference to the dataset we will be optimizing over is also required.
+   * Optionally, the distances between points can be normalized, to prevent
+   * underflows.  If all of the points in your dataset are relatively close
+   * (distances less than 700 or so), underflow will not occur.
    *
    * @param dataset Matrix containing the dataset.
    * @param labels Vector of class labels for each point in the dataset.
@@ -49,6 +52,7 @@
    */
   SoftmaxErrorFunction(const arma::mat& dataset,
                        const arma::uvec& labels,
+                       const bool normalizeDistances = true,
                        MetricType metric = MetricType());
 
   /**
@@ -109,11 +113,19 @@
   size_t NumFunctions() const { return dataset.n_cols; }
 
  private:
+  //! Reference to the dataset.
   const arma::mat& dataset;
+  //! Reference to the labels for the dataset.
   const arma::uvec& labels;
 
+  //! The instantiated metric to use.
   MetricType metric;
 
+  //! Whether or not to normalize the distances to 1.
+  bool normalizeDistances;
+  //! The normalization constant, used if normalizeDistances == true.
+  double normalizationConstant;
+
   //! Last coordinates.  Used for the non-separable Evaluate() and Gradient().
   arma::mat lastCoordinates;
   //! Stretched dataset.  Kept internal to avoid memory reallocations.

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 20:50:28 UTC (rev 13804)
@@ -15,14 +15,19 @@
 
 // Initialize with the given kernel.
 template<typename MetricType>
-SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(const arma::mat& dataset,
-                                                       const arma::uvec& labels,
-                                                       MetricType metric) :
-  dataset(dataset),
-  labels(labels),
-  metric(metric),
-  precalculated(false)
-{ /* nothing to do */ }
+SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(
+    const arma::mat& dataset,
+    const arma::uvec& labels,
+    const bool normalizeDistances,
+    MetricType metric) :
+    dataset(dataset),
+    labels(labels),
+    metric(metric),
+    normalizeDistances(normalizeDistances),
+    normalizationConstant(100.0), // Just start at some number.
+    precalculated(false)
+{
+}
 
 //! The non-separable implementation, which uses Precalculate() to save time.
 template<typename MetricType>
@@ -55,9 +60,26 @@
       continue;
 
     // We want to evaluate exp(-D(A x_i, A x_k)).
-    double eval = std::exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                            stretchedDataset.unsafe_col(k)));
+    double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                  stretchedDataset.unsafe_col(k));
 
+    // exp(-750) underflows.  So let's take action if the distance goes over
+    // 700.
+    if (normalizeDistances && ((dist / normalizationConstant) > 700))
+    {
+      Log::Warn << "Normalized distance between points " << i << " and " << k
+          << " is greater than limit of 700 (" << (dist / normalizationConstant)
+          << ").  Renormalizing..." << std::endl;
+      normalizationConstant = (1.5 * dist);
+
+      // We must re-call Evaluate() recursively since the normalization has
+      // changed.  This may do weird things to the objective function, but it
+      // should still optimize okay.
+      return Evaluate(coordinates, i);
+    }
+
+    double eval = std::exp(-dist);
+
     // If they are in the same
     if (labels[i] == labels[k])
       numerator += eval;
@@ -104,8 +126,27 @@
     for (size_t k = (i + 1); k < stretchedDataset.n_cols; k++)
     {
       // Calculate p_ik and p_ki first.
-      double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                         stretchedDataset.unsafe_col(k)));
+      double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                    stretchedDataset.unsafe_col(k));
+
+      // exp(-750) underflows.  So let's take action if the distance goes over
+      // 700.
+      if (normalizeDistances && ((dist / normalizationConstant) > 700))
+      {
+        Log::Warn << "Normalized distance between points " << i << " and " << k
+            << " is greater than limit of 700 ("
+            << (dist / normalizationConstant) << ").  Renormalizing...\n";
+        normalizationConstant = (1.5 * dist);
+
+        // We must re-call Gradient() recursively since the normalization has
+        // changed.  This may do weird things to the objective function, but it
+        // should still optimize okay.
+        Gradient(coordinates, gradient);
+        return;
+      }
+
+      double eval = exp(-dist);
+
       double p_ik = 0, p_ki = 0;
       p_ik = eval / denominators(i);
       p_ki = eval / denominators(k);
@@ -151,9 +192,27 @@
       continue;
 
     // Calculate p_ik.
-    double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                       stretchedDataset.unsafe_col(k)));
+    double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                  stretchedDataset.unsafe_col(k));
 
+    // exp(-750) underflows.  So let's take action if the distance goes over
+    // 700.
+    if (normalizeDistances && ((dist / normalizationConstant) > 700))
+    {
+      Log::Warn << "Normalized distance between points " << i << " and " << k
+          << " is greater than limit of 700 (" << (dist / normalizationConstant)
+          << ").  Renormalizing..." << std::endl;
+      normalizationConstant = (1.5 * dist);
+
+      // We must re-call Gradient() recursively since the normalization has
+      // changed.  This may do weird things to the objective function, but it
+      // should still optimize okay.
+      Gradient(coordinates, i, gradient);
+      return;
+    }
+
+    double eval = exp(-dist);
+
     // If the points are in the same class, we must add to the second term of
     // the gradient as well as the numerator of p_i.
     if (labels[i] == labels[k])
@@ -198,8 +257,8 @@
   lastCoordinates.set_size(coordinates.n_rows, coordinates.n_cols);
 
   // Make sure the calculation is necessary.
-  if ((accu(coordinates == lastCoordinates) == coordinates.n_elem) &&
-      precalculated)
+  if (precalculated &&
+      (accu(coordinates == lastCoordinates) == coordinates.n_elem))
     return; // No need to calculate; we already have this stuff saved.
 
   // Coordinates are different; save the new ones, and stretch the dataset.
@@ -219,9 +278,28 @@
     for (size_t j = (i + 1); j < stretchedDataset.n_cols; j++)
     {
       // Evaluate exp(-d(x_i, x_j)).
-      double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                         stretchedDataset.unsafe_col(j)));
+      double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                    stretchedDataset.unsafe_col(j));
 
+      // exp(-750) underflows.  So let's take action if the distance goes over
+      // 700.
+      if (normalizeDistances && ((dist / normalizationConstant) > 700))
+      {
+        Log::Warn << "Normalized distance between points " << i << " and " << j
+            << " is greater than limit of 700 ("
+            << (dist / normalizationConstant) << ").  Renormalizing...\n";
+        normalizationConstant = (1.5 * dist);
+
+        // We must re-call Gradient() recursively since the normalization has
+        // changed.  This may do weird things to the objective function, but it
+        // should still optimize okay.
+        precalculated = false;
+        Precalculate(coordinates);
+        return;
+      }
+
+      double eval = std::exp(-dist);
+
       // Add this to the denominators of both i and j: p_ij = p_ji.
       denominators[i] += eval;
       denominators[j] += eval;




More information about the mlpack-svn mailing list