[mlpack-svn] r13824 - mlpack/trunk/src/mlpack/methods/nca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Nov 1 17:50:27 EDT 2012
Author: rcurtin
Date: 2012-11-01 17:50:27 -0400 (Thu, 01 Nov 2012)
New Revision: 13824
Modified:
mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
Log:
Change the way things are normalized.
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2012-11-01 21:34:11 UTC (rev 13823)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2012-11-01 21:50:27 UTC (rev 13824)
@@ -33,8 +33,9 @@
"gradient descent (0 indicates no limit).", "n", 500000);
PARAM_DOUBLE("tolerance", "Maximum tolerance for termination of stochastic "
"gradient descent.", "t", 1e-7);
-PARAM_FLAG("normalize", "Normalize data; useful for datasets where points are "
- "far apart, or when SGD is converging to an objective of NaN.", "N");
+PARAM_FLAG("normalize", "Use a normalized starting point for optimization. This"
+ " is useful for when points are far apart, or when SGD is returning NaN.",
+ "N");
PARAM_INT("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0);
PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are "
"visited for SGD.", "L");
@@ -89,30 +90,24 @@
data.shed_row(data.n_rows - 1);
}
+ mat distance;
+
// Normalize the data, if necessary.
if (normalize)
{
// Find the minimum and maximum values for each dimension.
- arma::vec range = arma::max(data, 1) - arma::min(data, 1);
-
- // Now find the maximum range.
- double maxRange = arma::max(range);
-
- // We can place a (lazy) upper bound on the distance with range^2 * d.
- // Since we want no distance greater than 700 (because std::exp(-750)
- // underflows), we can normalize with (range^2 * d) / 700).
- double normalization = (std::pow(maxRange, 2.0) * data.n_rows) / 700.0;
- data /= normalization; // Element-wise division.
-
- Log::Info << "Data normalized (normalization constant " << normalization
- << ")." << std::endl;
+ distance = diagmat(1.0 / (arma::max(data, 1) - arma::min(data, 1)));
+ Log::Info << "Using normalized starting point for SGD." << std::endl;
}
+ else
+ {
+ distance.eye();
+ }
// Now create the NCA object and run the optimization.
NCA<LMetric<2> > nca(data, labels.unsafe_col(0), stepSize, maxIterations,
tolerance, shuffle);
- mat distance;
nca.LearnDistance(distance);
Log::Warn << trans(distance);
More information about the mlpack-svn
mailing list