[mlpack-svn] r13808 - mlpack/trunk/src/mlpack/methods/nca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Oct 31 17:27:43 EDT 2012
Author: rcurtin
Date: 2012-10-31 17:27:41 -0400 (Wed, 31 Oct 2012)
New Revision: 13808
Modified:
mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
Log:
Slightly smarter normalization strategy for large datasets.
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2012-10-31 21:19:56 UTC (rev 13807)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2012-10-31 21:27:41 UTC (rev 13808)
@@ -33,6 +33,8 @@
"gradient descent (0 indicates no limit).", "n", 500000);
PARAM_DOUBLE("tolerance", "Maximum tolerance for termination of stochastic "
"gradient descent.", "t", 1e-7);
+PARAM_FLAG("normalize", "Normalize data; useful for datasets where points are "
+ "far apart, or when SGD is converging to an objective of NaN.", "N");
using namespace mlpack;
using namespace mlpack::nca;
@@ -52,6 +54,7 @@
const double stepSize = CLI::GetParam<double>("step_size");
const size_t maxIterations = CLI::GetParam<int>("max_iterations");
const double tolerance = CLI::GetParam<double>("tolerance");
+ const bool normalize = CLI::HasParam("normalize");
// Load data.
mat data;
@@ -77,6 +80,25 @@
data.shed_row(data.n_rows - 1);
}
+ // Normalize the data, if necessary.
+ if (normalize)
+ {
+ // Find the minimum and maximum values for each dimension.
+ arma::vec range = arma::max(data, 1) - arma::min(data, 1);
+
+ // Now find the maximum range.
+ double maxRange = arma::max(range);
+
+ // We can place a (lazy) upper bound on the distance with range^2 * d.
+ // Since we want no distance greater than 700 (because std::exp(-750)
+ // underflows), we can normalize with (range^2 * d) / 700).
+ double normalization = (std::pow(maxRange, 2.0) * data.n_rows) / 700.0;
+ data /= normalization; // Element-wise division.
+
+ Log::Info << "Data normalized (normalization constant " << normalization
+ << ")." << std::endl;
+ }
+
// Now create the NCA object and run the optimization.
NCA<LMetric<2> > nca(data, labels.unsafe_col(0), stepSize, maxIterations,
tolerance);
More information about the mlpack-svn
mailing list