[mlpack-svn] r13824 - mlpack/trunk/src/mlpack/methods/nca

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Nov 1 17:50:27 EDT 2012


Author: rcurtin
Date: 2012-11-01 17:50:27 -0400 (Thu, 01 Nov 2012)
New Revision: 13824

Modified:
   mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
Log:
Change the way things are normalized.


Modified: mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp	2012-11-01 21:34:11 UTC (rev 13823)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp	2012-11-01 21:50:27 UTC (rev 13824)
@@ -33,8 +33,9 @@
     "gradient descent (0 indicates no limit).", "n", 500000);
 PARAM_DOUBLE("tolerance", "Maximum tolerance for termination of stochastic "
     "gradient descent.", "t", 1e-7);
-PARAM_FLAG("normalize", "Normalize data; useful for datasets where points are "
-    "far apart, or when SGD is converging to an objective of NaN.", "N");
+PARAM_FLAG("normalize", "Use a normalized starting point for optimization. This"
+    " is useful for when points are far apart, or when SGD is returning NaN.",
+    "N");
 PARAM_INT("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
 PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are "
     "visited for SGD.", "L");
@@ -89,30 +90,24 @@
     data.shed_row(data.n_rows - 1);
   }
 
+  mat distance;
+
   // Normalize the data, if necessary.
   if (normalize)
   {
     // Find the minimum and maximum values for each dimension.
-    arma::vec range = arma::max(data, 1) - arma::min(data, 1);
-
-    // Now find the maximum range.
-    double maxRange = arma::max(range);
-
-    // We can place a (lazy) upper bound on the distance with range^2 * d.
-    // Since we want no distance greater than 700 (because std::exp(-750)
-    // underflows), we can normalize with (range^2 * d) / 700).
-    double normalization = (std::pow(maxRange, 2.0) * data.n_rows) / 700.0;
-    data /= normalization; // Element-wise division.
-
-    Log::Info << "Data normalized (normalization constant " << normalization
-        << ")." << std::endl;
+    distance = diagmat(1.0 / (arma::max(data, 1) - arma::min(data, 1)));
+    Log::Info << "Using normalized starting point for SGD." << std::endl;
   }
+  else
+  {
+    distance.eye();
+  }
 
   // Now create the NCA object and run the optimization.
   NCA<LMetric<2> > nca(data, labels.unsafe_col(0), stepSize, maxIterations,
       tolerance, shuffle);
 
-  mat distance;
   nca.LearnDistance(distance);
 
   Log::Warn << trans(distance);




More information about the mlpack-svn mailing list