[mlpack-svn] r13806 - mlpack/trunk/src/mlpack/methods/nca

Wed Oct 31 17:00:22 EDT 2012

Author: rcurtin
Date: 2012-10-31 17:00:22 -0400 (Wed, 31 Oct 2012)
New Revision: 13806

Modified:
   mlpack/trunk/src/mlpack/methods/nca/nca.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
   mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
Log:
Remove the normalization stuff... that is not the right way to do it.


Modified: mlpack/trunk/src/mlpack/methods/nca/nca.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/nca/nca.hpp	2012-10-31 20:54:03 UTC (rev 13805)
+++ mlpack/trunk/src/mlpack/methods/nca/nca.hpp	2012-10-31 21:00:22 UTC (rev 13806)
@@ -43,26 +43,19 @@
   /**
    * Construct the Neighborhood Components Analysis object.  This simply stores
    * the reference to the dataset and labels as well as the parameters for
-   * optimization before the actual optimization is performed.  In cases where
-   * points in the dataset are far apart (>700), some calculations will
-   * underflow; in this case, normalizeDistances should be set to true (it is by
-   * default).  It can be set to false for very minor speed gains.
+   * optimization before the actual optimization is performed.
    *
    * @param dataset Input dataset.
    * @param labels Input dataset labels.
    * @param stepSize Step size for stochastic gradient descent.
    * @param maxIterations Maximum iterations for stochastic gradient descent.
    * @param tolerance Tolerance for termination of stochastic gradient descent.
-   * @param normalizeDistances Whether or not distances should be normalized;
-   *     this is useful when the points in the dataset are far apart.
-   * @param metric Instantiated metric type.
    */
   NCA(const arma::mat& dataset,
       const arma::uvec& labels,
       const double stepSize = 0.01,
       const size_t maxIterations = 500000,
       const double tolerance = 1e-5,
-      const bool normalizeDistances = true,
       MetricType metric = MetricType());
 
   /**
@@ -108,8 +101,6 @@
   size_t maxIterations;
   //! Tolerance for termination of stochastic gradient descent.
   double tolerance;
-  //! Whether or not distances should be normalized in the error function.
-  bool normalizeDistances;
 };
 
 }; // namespace nca

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp	2012-10-31 20:54:03 UTC (rev 13805)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp	2012-10-31 21:00:22 UTC (rev 13806)
@@ -24,15 +24,13 @@
                      const double stepSize,
                      const size_t maxIterations,
                      const double tolerance,
-                     const bool normalizeDistances,
                      MetricType metric) :
     dataset(dataset),
     labels(labels),
     metric(metric),
     stepSize(stepSize),
     maxIterations(maxIterations),
-    tolerance(tolerance),
-    normalizeDistances(normalizeDistances)
+    tolerance(tolerance)
 { /* Nothing to do. */ }
 
 template<typename MetricType>
@@ -40,12 +38,11 @@
 {
   outputMatrix = arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
 
-  SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels,
-      normalizeDistances, metric);
+  SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels, metric);
 
   // We will use stochastic gradient descent to optimize the NCA error function.
   optimization::SGD<SoftmaxErrorFunction<MetricType> > sgd(errorFunc, stepSize,
-      maxIterations, tolerance, normalizeDistances);
+      maxIterations, tolerance);
 
   Timer::Start("nca_sgd_optimization");
 

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp	2012-10-31 20:54:03 UTC (rev 13805)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp	2012-10-31 21:00:22 UTC (rev 13806)
@@ -33,8 +33,6 @@
     "gradient descent (0 indicates no limit).", "n", 500000);
 PARAM_DOUBLE("tolerance", "Maximum tolerance for termination of stochastic "
     "gradient descent.", "t", 1e-7);
-PARAM_FLAG("no_normalization", "Do not normalize distances (this should not be"
-    "set if squared distances between points are greater than 700).", "N");
 
 using namespace mlpack;
 using namespace mlpack::nca;
@@ -54,7 +52,6 @@
   const double stepSize = CLI::GetParam<double>("step_size");
   const size_t maxIterations = CLI::GetParam<int>("max_iterations");
   const double tolerance = CLI::GetParam<double>("tolerance");
-  const bool normalize = !CLI::HasParam("no_normalization");
 
   // Load data.
   mat data;
@@ -82,7 +79,7 @@
 
   // Now create the NCA object and run the optimization.
   NCA<LMetric<2> > nca(data, labels.unsafe_col(0), stepSize, maxIterations,
-      tolerance, normalize);
+      tolerance);
 
   mat distance;
   nca.LearnDistance(distance);

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 20:54:03 UTC (rev 13805)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp	2012-10-31 21:00:22 UTC (rev 13806)
@@ -42,9 +42,6 @@
    * store, which is set elsewhere.  If no kernel is given, an empty kernel is
    * used; this way, you can call the constructor with no arguments.  A
    * reference to the dataset we will be optimizing over is also required.
-   * Optionally, the distances between points can be normalized, to prevent
-   * underflows.  If all of the points in your dataset are relatively close
-   * (distances less than 700 or so), underflow will not occur.
    *
    * @param dataset Matrix containing the dataset.
    * @param labels Vector of class labels for each point in the dataset.
@@ -52,7 +49,6 @@
    */
   SoftmaxErrorFunction(const arma::mat& dataset,
                        const arma::uvec& labels,
-                       const bool normalizeDistances = true,
                        MetricType metric = MetricType());
 
   /**
@@ -113,19 +109,11 @@
   size_t NumFunctions() const { return dataset.n_cols; }
 
  private:
-  //! Reference to the dataset.
   const arma::mat& dataset;
-  //! Reference to the labels for the dataset.
   const arma::uvec& labels;
 
-  //! The instantiated metric to use.
   MetricType metric;
 
-  //! Whether or not to normalize the distances to 1.
-  bool normalizeDistances;
-  //! The normalization constant, used if normalizeDistances == true.
-  double normalizationConstant;
-
   //! Last coordinates.  Used for the non-separable Evaluate() and Gradient().
   arma::mat lastCoordinates;
   //! Stretched dataset.  Kept internal to avoid memory reallocations.

Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 20:54:03 UTC (rev 13805)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp	2012-10-31 21:00:22 UTC (rev 13806)
@@ -15,19 +15,14 @@
 
 // Initialize with the given kernel.
 template<typename MetricType>
-SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(
-    const arma::mat& dataset,
-    const arma::uvec& labels,
-    const bool normalizeDistances,
-    MetricType metric) :
-    dataset(dataset),
-    labels(labels),
-    metric(metric),
-    normalizeDistances(normalizeDistances),
-    normalizationConstant(100.0), // Just start at some number.
-    precalculated(false)
-{
-}
+SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(const arma::mat& dataset,
+                                                       const arma::uvec& labels,
+                                                       MetricType metric) :
+  dataset(dataset),
+  labels(labels),
+  metric(metric),
+  precalculated(false)
+{ /* nothing to do */ }
 
 //! The non-separable implementation, which uses Precalculate() to save time.
 template<typename MetricType>
@@ -60,26 +55,9 @@
       continue;
 
     // We want to evaluate exp(-D(A x_i, A x_k)).
-    double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                  stretchedDataset.unsafe_col(k));
+    double eval = std::exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                            stretchedDataset.unsafe_col(k)));
 
-    // exp(-750) underflows.  So let's take action if the distance goes over
-    // 700.
-    if (normalizeDistances && ((dist / normalizationConstant) > 700))
-    {
-      Log::Warn << "Normalized distance between points " << i << " and " << k
-          << " is greater than limit of 700 (" << (dist / normalizationConstant)
-          << ").  Renormalizing..." << std::endl;
-      normalizationConstant = (1.5 * dist);
-
-      // We must re-call Evaluate() recursively since the normalization has
-      // changed.  This may do weird things to the objective function, but it
-      // should still optimize okay.
-      return Evaluate(coordinates, i);
-    }
-
-    double eval = std::exp(-dist);
-
     // If they are in the same
     if (labels[i] == labels[k])
       numerator += eval;
@@ -126,27 +104,8 @@
     for (size_t k = (i + 1); k < stretchedDataset.n_cols; k++)
     {
       // Calculate p_ik and p_ki first.
-      double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                    stretchedDataset.unsafe_col(k));
-
-      // exp(-750) underflows.  So let's take action if the distance goes over
-      // 700.
-      if (normalizeDistances && ((dist / normalizationConstant) > 700))
-      {
-        Log::Warn << "Normalized distance between points " << i << " and " << k
-            << " is greater than limit of 700 ("
-            << (dist / normalizationConstant) << ").  Renormalizing...\n";
-        normalizationConstant = (1.5 * dist);
-
-        // We must re-call Gradient() recursively since the normalization has
-        // changed.  This may do weird things to the objective function, but it
-        // should still optimize okay.
-        Gradient(coordinates, gradient);
-        return;
-      }
-
-      double eval = exp(-dist);
-
+      double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                         stretchedDataset.unsafe_col(k)));
       double p_ik = 0, p_ki = 0;
       p_ik = eval / denominators(i);
       p_ki = eval / denominators(k);
@@ -192,27 +151,9 @@
       continue;
 
     // Calculate p_ik.
-    double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                  stretchedDataset.unsafe_col(k));
+    double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                       stretchedDataset.unsafe_col(k)));
 
-    // exp(-750) underflows.  So let's take action if the distance goes over
-    // 700.
-    if (normalizeDistances && ((dist / normalizationConstant) > 700))
-    {
-      Log::Warn << "Normalized distance between points " << i << " and " << k
-          << " is greater than limit of 700 (" << (dist / normalizationConstant)
-          << ").  Renormalizing..." << std::endl;
-      normalizationConstant = (1.5 * dist);
-
-      // We must re-call Gradient() recursively since the normalization has
-      // changed.  This may do weird things to the objective function, but it
-      // should still optimize okay.
-      Gradient(coordinates, i, gradient);
-      return;
-    }
-
-    double eval = exp(-dist);
-
     // If the points are in the same class, we must add to the second term of
     // the gradient as well as the numerator of p_i.
     if (labels[i] == labels[k])
@@ -257,8 +198,8 @@
   lastCoordinates.set_size(coordinates.n_rows, coordinates.n_cols);
 
   // Make sure the calculation is necessary.
-  if (precalculated &&
-      (accu(coordinates == lastCoordinates) == coordinates.n_elem))
+  if ((accu(coordinates == lastCoordinates) == coordinates.n_elem) &&
+      precalculated)
     return; // No need to calculate; we already have this stuff saved.
 
   // Coordinates are different; save the new ones, and stretch the dataset.
@@ -278,28 +219,9 @@
     for (size_t j = (i + 1); j < stretchedDataset.n_cols; j++)
     {
       // Evaluate exp(-d(x_i, x_j)).
-      double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
-                                    stretchedDataset.unsafe_col(j));
+      double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+                                         stretchedDataset.unsafe_col(j)));
 
-      // exp(-750) underflows.  So let's take action if the distance goes over
-      // 700.
-      if (normalizeDistances && ((dist / normalizationConstant) > 700))
-      {
-        Log::Warn << "Normalized distance between points " << i << " and " << j
-            << " is greater than limit of 700 ("
-            << (dist / normalizationConstant) << ").  Renormalizing...\n";
-        normalizationConstant = (1.5 * dist);
-
-        // We must re-call Gradient() recursively since the normalization has
-        // changed.  This may do weird things to the objective function, but it
-        // should still optimize okay.
-        precalculated = false;
-        Precalculate(coordinates);
-        return;
-      }
-
-      double eval = std::exp(-dist);
-
       // Add this to the denominators of both i and j: p_ij = p_ji.
       denominators[i] += eval;
       denominators[j] += eval;