[mlpack-svn] r13804 - mlpack/trunk/src/mlpack/methods/nca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Oct 31 16:50:28 EDT 2012
Author: rcurtin
Date: 2012-10-31 16:50:28 -0400 (Wed, 31 Oct 2012)
New Revision: 13804
Modified:
mlpack/trunk/src/mlpack/methods/nca/nca.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
Log:
Allow normalization of points, to prevent underflows.
Modified: mlpack/trunk/src/mlpack/methods/nca/nca.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca.hpp 2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca.hpp 2012-10-31 20:50:28 UTC (rev 13804)
@@ -43,19 +43,26 @@
/**
* Construct the Neighborhood Components Analysis object. This simply stores
* the reference to the dataset and labels as well as the parameters for
- * optimization before the actual optimization is performed.
+ * optimization before the actual optimization is performed. In cases where
+ * points in the dataset are far apart (>700), some calculations will
+ * underflow; in this case, normalizeDistances should be set to true (it is by
+ * default). It can be set to false for very minor speed gains.
*
* @param dataset Input dataset.
* @param labels Input dataset labels.
* @param stepSize Step size for stochastic gradient descent.
* @param maxIterations Maximum iterations for stochastic gradient descent.
* @param tolerance Tolerance for termination of stochastic gradient descent.
+ * @param normalizeDistances Whether or not distances should be normalized;
+ * this is useful when the points in the dataset are far apart.
+ * @param metric Instantiated metric type.
*/
NCA(const arma::mat& dataset,
const arma::uvec& labels,
const double stepSize = 0.01,
const size_t maxIterations = 500000,
const double tolerance = 1e-5,
+ const bool normalizeDistances = true,
MetricType metric = MetricType());
/**
@@ -101,6 +108,8 @@
size_t maxIterations;
//! Tolerance for termination of stochastic gradient descent.
double tolerance;
+ //! Whether or not distances should be normalized in the error function.
+ bool normalizeDistances;
};
}; // namespace nca
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp 2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp 2012-10-31 20:50:28 UTC (rev 13804)
@@ -24,13 +24,15 @@
const double stepSize,
const size_t maxIterations,
const double tolerance,
+ const bool normalizeDistances,
MetricType metric) :
dataset(dataset),
labels(labels),
metric(metric),
stepSize(stepSize),
maxIterations(maxIterations),
- tolerance(tolerance)
+ tolerance(tolerance),
+ normalizeDistances(normalizeDistances)
{ /* Nothing to do. */ }
template<typename MetricType>
@@ -38,11 +40,12 @@
{
outputMatrix = arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
- SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels, metric);
+ SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels,
+ normalizeDistances, metric);
// We will use stochastic gradient descent to optimize the NCA error function.
optimization::SGD<SoftmaxErrorFunction<MetricType> > sgd(errorFunc, stepSize,
- maxIterations, tolerance);
+ maxIterations, tolerance, normalizeDistances);
Timer::Start("nca_sgd_optimization");
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2012-10-31 20:50:28 UTC (rev 13804)
@@ -33,6 +33,8 @@
"gradient descent (0 indicates no limit).", "n", 500000);
PARAM_DOUBLE("tolerance", "Maximum tolerance for termination of stochastic "
"gradient descent.", "t", 1e-7);
+PARAM_FLAG("no_normalization", "Do not normalize distances (this should not be"
+ "set if squared distances between points are greater than 700).", "N");
using namespace mlpack;
using namespace mlpack::nca;
@@ -52,6 +54,7 @@
const double stepSize = CLI::GetParam<double>("step_size");
const size_t maxIterations = CLI::GetParam<int>("max_iterations");
const double tolerance = CLI::GetParam<double>("tolerance");
+ const bool normalize = !CLI::HasParam("no_normalization");
// Load data.
mat data;
@@ -79,7 +82,7 @@
// Now create the NCA object and run the optimization.
NCA<LMetric<2> > nca(data, labels.unsafe_col(0), stepSize, maxIterations,
- tolerance);
+ tolerance, normalize);
mat distance;
nca.LearnDistance(distance);
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp 2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp 2012-10-31 20:50:28 UTC (rev 13804)
@@ -42,6 +42,9 @@
* store, which is set elsewhere. If no kernel is given, an empty kernel is
* used; this way, you can call the constructor with no arguments. A
* reference to the dataset we will be optimizing over is also required.
+ * Optionally, the distances between points can be normalized, to prevent
+ * underflows. If all of the points in your dataset are relatively close
+ * (distances less than 700 or so), underflow will not occur.
*
* @param dataset Matrix containing the dataset.
* @param labels Vector of class labels for each point in the dataset.
@@ -49,6 +52,7 @@
*/
SoftmaxErrorFunction(const arma::mat& dataset,
const arma::uvec& labels,
+ const bool normalizeDistances = true,
MetricType metric = MetricType());
/**
@@ -109,11 +113,19 @@
size_t NumFunctions() const { return dataset.n_cols; }
private:
+ //! Reference to the dataset.
const arma::mat& dataset;
+ //! Reference to the labels for the dataset.
const arma::uvec& labels;
+ //! The instantiated metric to use.
MetricType metric;
+ //! Whether or not to normalize the distances to 1.
+ bool normalizeDistances;
+ //! The normalization constant, used if normalizeDistances == true.
+ double normalizationConstant;
+
//! Last coordinates. Used for the non-separable Evaluate() and Gradient().
arma::mat lastCoordinates;
//! Stretched dataset. Kept internal to avoid memory reallocations.
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp 2012-10-31 20:19:22 UTC (rev 13803)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp 2012-10-31 20:50:28 UTC (rev 13804)
@@ -15,14 +15,19 @@
// Initialize with the given kernel.
template<typename MetricType>
-SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(const arma::mat& dataset,
- const arma::uvec& labels,
- MetricType metric) :
- dataset(dataset),
- labels(labels),
- metric(metric),
- precalculated(false)
-{ /* nothing to do */ }
+SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(
+ const arma::mat& dataset,
+ const arma::uvec& labels,
+ const bool normalizeDistances,
+ MetricType metric) :
+ dataset(dataset),
+ labels(labels),
+ metric(metric),
+ normalizeDistances(normalizeDistances),
+ normalizationConstant(100.0), // Just start at some number.
+ precalculated(false)
+{
+}
//! The non-separable implementation, which uses Precalculate() to save time.
template<typename MetricType>
@@ -55,9 +60,26 @@
continue;
// We want to evaluate exp(-D(A x_i, A x_k)).
- double eval = std::exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
- stretchedDataset.unsafe_col(k)));
+ double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(k));
+ // exp(-750) underflows. So let's take action if the distance goes over
+ // 700.
+ if (normalizeDistances && ((dist / normalizationConstant) > 700))
+ {
+ Log::Warn << "Normalized distance between points " << i << " and " << k
+ << " is greater than limit of 700 (" << (dist / normalizationConstant)
+ << "). Renormalizing..." << std::endl;
+ normalizationConstant = (1.5 * dist);
+
+ // We must re-call Evaluate() recursively since the normalization has
+ // changed. This may do weird things to the objective function, but it
+ // should still optimize okay.
+ return Evaluate(coordinates, i);
+ }
+
+ double eval = std::exp(-dist);
+
// If they are in the same
if (labels[i] == labels[k])
numerator += eval;
@@ -104,8 +126,27 @@
for (size_t k = (i + 1); k < stretchedDataset.n_cols; k++)
{
// Calculate p_ik and p_ki first.
- double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
- stretchedDataset.unsafe_col(k)));
+ double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(k));
+
+ // exp(-750) underflows. So let's take action if the distance goes over
+ // 700.
+ if (normalizeDistances && ((dist / normalizationConstant) > 700))
+ {
+ Log::Warn << "Normalized distance between points " << i << " and " << k
+ << " is greater than limit of 700 ("
+ << (dist / normalizationConstant) << "). Renormalizing...\n";
+ normalizationConstant = (1.5 * dist);
+
+ // We must re-call Gradient() recursively since the normalization has
+ // changed. This may do weird things to the objective function, but it
+ // should still optimize okay.
+ Gradient(coordinates, gradient);
+ return;
+ }
+
+ double eval = exp(-dist);
+
double p_ik = 0, p_ki = 0;
p_ik = eval / denominators(i);
p_ki = eval / denominators(k);
@@ -151,9 +192,27 @@
continue;
// Calculate p_ik.
- double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
- stretchedDataset.unsafe_col(k)));
+ double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(k));
+ // exp(-750) underflows. So let's take action if the distance goes over
+ // 700.
+ if (normalizeDistances && ((dist / normalizationConstant) > 700))
+ {
+ Log::Warn << "Normalized distance between points " << i << " and " << k
+ << " is greater than limit of 700 (" << (dist / normalizationConstant)
+ << "). Renormalizing..." << std::endl;
+ normalizationConstant = (1.5 * dist);
+
+ // We must re-call Gradient() recursively since the normalization has
+ // changed. This may do weird things to the objective function, but it
+ // should still optimize okay.
+ Gradient(coordinates, i, gradient);
+ return;
+ }
+
+ double eval = exp(-dist);
+
// If the points are in the same class, we must add to the second term of
// the gradient as well as the numerator of p_i.
if (labels[i] == labels[k])
@@ -198,8 +257,8 @@
lastCoordinates.set_size(coordinates.n_rows, coordinates.n_cols);
// Make sure the calculation is necessary.
- if ((accu(coordinates == lastCoordinates) == coordinates.n_elem) &&
- precalculated)
+ if (precalculated &&
+ (accu(coordinates == lastCoordinates) == coordinates.n_elem))
return; // No need to calculate; we already have this stuff saved.
// Coordinates are different; save the new ones, and stretch the dataset.
@@ -219,9 +278,28 @@
for (size_t j = (i + 1); j < stretchedDataset.n_cols; j++)
{
// Evaluate exp(-d(x_i, x_j)).
- double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
- stretchedDataset.unsafe_col(j)));
+ double dist = metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(j));
+ // exp(-750) underflows. So let's take action if the distance goes over
+ // 700.
+ if (normalizeDistances && ((dist / normalizationConstant) > 700))
+ {
+ Log::Warn << "Normalized distance between points " << i << " and " << j
+ << " is greater than limit of 700 ("
+ << (dist / normalizationConstant) << "). Renormalizing...\n";
+ normalizationConstant = (1.5 * dist);
+
+ // We must re-call Gradient() recursively since the normalization has
+ // changed. This may do weird things to the objective function, but it
+ // should still optimize okay.
+ precalculated = false;
+ Precalculate(coordinates);
+ return;
+ }
+
+ double eval = std::exp(-dist);
+
// Add this to the denominators of both i and j: p_ij = p_ji.
denominators[i] += eval;
denominators[j] += eval;
More information about the mlpack-svn
mailing list