[mlpack-svn] r13802 - mlpack/trunk/src/mlpack/methods/nca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Oct 31 15:54:06 EDT 2012
Author: rcurtin
Date: 2012-10-31 15:54:06 -0400 (Wed, 31 Oct 2012)
New Revision: 13802
Modified:
mlpack/trunk/src/mlpack/methods/nca/nca.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
Log:
Implement decomposable error function and gradient, then make NCA use SGD and
not L-BFGS. That's a lot of acronyms... for now, parameters to SGD are
specifiable through the NCA interface, but I think in the long run NCA should be
templatized to use an arbitrary optimizer.
Modified: mlpack/trunk/src/mlpack/methods/nca/nca.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca.hpp 2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca.hpp 2012-10-31 19:54:06 UTC (rev 13802)
@@ -36,17 +36,27 @@
* }
* @endcode
*/
-template<typename Kernel>
+template<typename MetricType>
class NCA
{
public:
/**
* Construct the Neighborhood Components Analysis object. This simply stores
- * the reference to the dataset, before the actual optimization is performed.
+ * the reference to the dataset and labels as well as the parameters for
+ * optimization before the actual optimization is performed.
*
* @param dataset Input dataset.
+ * @param labels Input dataset labels.
+ * @param stepSize Step size for stochastic gradient descent.
+ * @param maxIterations Maximum iterations for stochastic gradient descent.
+ * @param tolerance Tolerance for termination of stochastic gradient descent.
*/
- NCA(const arma::mat& dataset, const arma::uvec& labels);
+ NCA(const arma::mat& dataset,
+ const arma::uvec& labels,
+ const double stepSize = 0.01,
+ const size_t maxIterations = 500000,
+ const double tolerance = 1e-5,
+ MetricType metric = MetricType());
/**
* Perform Neighborhood Components Analysis. The output distance learning
@@ -56,9 +66,41 @@
*/
void LearnDistance(arma::mat& outputMatrix);
+ //! Get the dataset reference.
+ const arma::mat& Dataset() const { return dataset; }
+ //! Get the labels reference.
+ const arma::uvec& Labels() const { return labels; }
+
+ //! Get the step size for stochastic gradient descent.
+ double StepSize() const { return stepSize; }
+ //! Modify the step size for stochastic gradient descent.
+ double& StepSize() { return stepSize; }
+
+ //! Get the maximum number of iterations for stochastic gradient descent.
+ size_t MaxIterations() const { return maxIterations; }
+ //! Modify the maximum number of iterations for stochastic gradient descent.
+ size_t& MaxIterations() { return maxIterations; }
+
+ //! Get the tolerance for the termination of stochastic gradient descent.
+ double Tolerance() const { return tolerance; }
+ //! Modify the tolerance for the termination of stochastic gradient descent.
+ double& Tolerance() { return tolerance; }
+
private:
+ //! Dataset reference.
const arma::mat& dataset;
+ //! Labels reference.
const arma::uvec& labels;
+
+ //! Metric to be used.
+ MetricType metric;
+
+ //! Step size for stochastic gradient descent.
+ double stepSize;
+ //! Maximum iterations for stochastic gradient descent.
+ size_t maxIterations;
+ //! Tolerance for termination of stochastic gradient descent.
+ double tolerance;
};
}; // namespace nca
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp 2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp 2012-10-31 19:54:06 UTC (rev 13802)
@@ -10,7 +10,7 @@
// In case it was not already included.
#include "nca.hpp"
-#include <mlpack/core/optimizers/lbfgs/lbfgs.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
#include "nca_softmax_error_function.hpp"
@@ -18,25 +18,37 @@
namespace nca {
// Just set the internal matrix reference.
-template<typename Kernel>
-NCA<Kernel>::NCA(const arma::mat& dataset, const arma::uvec& labels) :
- dataset(dataset), labels(labels) { /* nothing to do */ }
+template<typename MetricType>
+NCA<MetricType>::NCA(const arma::mat& dataset,
+ const arma::uvec& labels,
+ const double stepSize,
+ const size_t maxIterations,
+ const double tolerance,
+ MetricType metric) :
+ dataset(dataset),
+ labels(labels),
+ metric(metric),
+ stepSize(stepSize),
+ maxIterations(maxIterations),
+ tolerance(tolerance)
+{ /* Nothing to do. */ }
-template<typename Kernel>
-void NCA<Kernel>::LearnDistance(arma::mat& outputMatrix)
+template<typename MetricType>
+void NCA<MetricType>::LearnDistance(arma::mat& outputMatrix)
{
outputMatrix = arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
- SoftmaxErrorFunction<Kernel> errorFunc(dataset, labels);
+ SoftmaxErrorFunction<MetricType> errorFunc(dataset, labels, metric);
- // We will use the L-BFGS optimizer to optimize the stretching matrix.
- optimization::L_BFGS<SoftmaxErrorFunction<Kernel> > lbfgs(errorFunc, 10);
+ // We will use stochastic gradient descent to optimize the NCA error function.
+ optimization::SGD<SoftmaxErrorFunction<MetricType> > sgd(errorFunc, stepSize,
+ maxIterations, tolerance);
- Timer::Start("nca_lbfgs_optimization");
+ Timer::Start("nca_sgd_optimization");
- lbfgs.Optimize(outputMatrix);
+ sgd.Optimize(outputMatrix);
- Timer::Stop("nca_lbfgs_optimization");
+ Timer::Stop("nca_sgd_optimization");
}
}; // namespace nca
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp 2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp 2012-10-31 19:54:06 UTC (rev 13802)
@@ -61,6 +61,18 @@
double Evaluate(const arma::mat& covariance);
/**
+ * Evaluate the softmax objective function for the given covariance matrix on
+ * only one point of the dataset. This is the separable implementation, where
+ * the objective function is decomposed into the sum of many objective
+ * functions, and here, only one of those constituent objective functions is
+ * returned.
+ *
+ * @param covariance Covariance matrix of Mahalanobis distance.
+ * @param i Index of point to use for objective function.
+ */
+ double Evaluate(const arma::mat& covariance, const size_t i);
+
+ /**
* Evaluate the gradient of the softmax function for the given covariance
* matrix. This is the non-separable implementation, where the objective
* function is not decomposed into the sum of several objective functions.
@@ -71,10 +83,31 @@
void Gradient(const arma::mat& covariance, arma::mat& gradient);
/**
+ * Evaluate the gradient of the softmax function for the given covariance
+ * matrix on only one point of the dataset. This is the separable
+ * implementation, where the objective function is decomposed into the sum of
+ * many objective functions, and here, only one of those constituent objective
+ * functions is returned.
+ *
+ * @param covariance Covariance matrix of Mahalanobis distance.
+ * @param i Index of point to use for objective function.
+ * @param gradient Matrix to store the calculated gradient in.
+ */
+ void Gradient(const arma::mat& covariance,
+ const size_t i,
+ arma::mat& gradient);
+
+ /**
* Get the initial point.
*/
const arma::mat GetInitialPoint() const;
+ /**
+ * Get the number of functions the objective function can be decomposed into.
+ * This is just the number of points in the dataset.
+ */
+ size_t NumFunctions() const { return dataset.n_cols; }
+
private:
const arma::mat& dataset;
const arma::uvec& labels;
@@ -83,7 +116,7 @@
//! Last coordinates. Used for the non-separable Evaluate() and Gradient().
arma::mat lastCoordinates;
- //! Stretched dataset. Used for the non-separable Evaluate() and Gradient().
+ //! Stretched dataset. Kept internal to avoid memory reallocations.
arma::mat stretchedDataset;
//! Holds calculated p_i, for the non-separable Evaluate() and Gradient().
arma::vec p;
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp 2012-10-31 19:18:53 UTC (rev 13801)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp 2012-10-31 19:54:06 UTC (rev 13802)
@@ -24,6 +24,7 @@
precalculated(false)
{ /* nothing to do */ }
+//! The non-separable implementation, which uses Precalculate() to save time.
template<typename MetricType>
double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates)
{
@@ -31,10 +32,53 @@
Precalculate(coordinates);
return -accu(p); // Sum of p_i for all i. We negate because our solver
- // minimizes, not maximizes.
+ // minimizes, not maximizes.
};
+//! The separated objective function, which does not use Precalculate().
template<typename MetricType>
+double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates,
+ const size_t i)
+{
+ // Unfortunately each evaluation will take O(N) time because it requires a
+ // scan over all points in the dataset. Our objective is to compute p_ij.
+ double denominator = 0;
+ double numerator = 0;
+
+ // It's quicker to do this now than one point at a time later.
+ stretchedDataset = coordinates * dataset;
+
+ for (size_t k = 0; k < dataset.n_cols; ++k)
+ {
+ // Don't consider the case where the points are the same.
+ if (k == i)
+ continue;
+
+ // We want to evaluate exp(-D(A x_i, A x_k)).
+ double eval = std::exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(k)));
+
+ // If they are in the same
+ if (labels[i] == labels[k])
+ numerator += eval;
+
+ denominator += eval;
+ }
+
+ // Now the result is just a simple division, but we have to be sure that the
+ // denominator is not 0.
+ if (denominator == 0.0)
+ {
+ Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl;
+ return 0;
+ }
+ else
+ return -(numerator / denominator); // Negate because the optimizer is a
+ // minimizer.
+}
+
+//! The non-separable implementation, where Precalculate() is used.
+template<typename MetricType>
void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
arma::mat& gradient)
{
@@ -81,7 +125,66 @@
gradient = -2 * coordinates * sum;
}
+//! The separable implementation.
template<typename MetricType>
+void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
+ const size_t i,
+ arma::mat& gradient)
+{
+ // We will need to calculate p_i before this evaluation is done, so these two
+ // variables will hold the information necessary for that.
+ double numerator = 0;
+ double denominator = 0;
+
+ // The gradient involves two matrix terms which are eventually combined into
+ // one.
+ arma::mat firstTerm(coordinates.n_rows, coordinates.n_cols);
+ arma::mat secondTerm(coordinates.n_rows, coordinates.n_cols);
+
+ // Compute the stretched dataset.
+ stretchedDataset = coordinates * dataset;
+
+ for (size_t k = 0; k < dataset.n_cols; ++k)
+ {
+ // Don't consider the case where the points are the same.
+ if (i == k)
+ continue;
+
+ // Calculate p_ik.
+ double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(k)));
+
+ // If the points are in the same class, we must add to the second term of
+ // the gradient as well as the numerator of p_i.
+ if (labels[i] == labels[k])
+ {
+ numerator += eval;
+ secondTerm += eval *
+ (stretchedDataset.col(i) - stretchedDataset.col(k)) *
+ arma::trans(stretchedDataset.col(i) - stretchedDataset.col(k));
+ }
+
+ // We always have to add to the denominator of p_i and the first term of the
+ // gradient computation.
+ denominator += eval;
+ firstTerm += eval *
+ (stretchedDataset.col(i) - stretchedDataset.col(k)) *
+ arma::trans(stretchedDataset.col(i) - stretchedDataset.col(k));
+ }
+
+ // Calculate p_i.
+ double p = 0;
+ if (denominator == 0)
+ Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl;
+ else
+ p = numerator / denominator;
+
+ // Now multiply the first term by p_i, and add the two together and multiply
+ // all by 2 * A. We negate it though, because our optimizer is a minimizer.
+ gradient = -2 * coordinates * (p * firstTerm - secondTerm);
+}
+
+template<typename MetricType>
const arma::mat SoftmaxErrorFunction<MetricType>::GetInitialPoint() const
{
return arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
@@ -117,7 +220,7 @@
{
// Evaluate exp(-d(x_i, x_j)).
double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
- stretchedDataset.unsafe_col(j)));
+ stretchedDataset.unsafe_col(j)));
// Add this to the denominators of both i and j: p_ij = p_ji.
denominators[i] += eval;
More information about the mlpack-svn
mailing list