[mlpack-svn] r13800 - mlpack/trunk/src/mlpack/methods/nca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Oct 31 15:03:24 EDT 2012
Author: rcurtin
Date: 2012-10-31 15:03:24 -0400 (Wed, 31 Oct 2012)
New Revision: 13800
Modified:
mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
Log:
Fix formatting issues and prepare for new functions.
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp 2012-10-31 18:13:08 UTC (rev 13799)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function.hpp 2012-10-31 19:03:24 UTC (rev 13800)
@@ -24,11 +24,16 @@
* where x_n represents a point and A is the current scaling matrix.
*
* This class is more flexible than the original paper, allowing an arbitrary
- * kernel function to be used, meaning that the Mahalanobis distance is not the
- * only allowed way to run NCA. However, the Mahalanobis distance is probably
- * the best way to use this.
+ * metric function to be used in place of || A x_i - A x_j ||^2, meaning that
+ * the squared Euclidean distance is not the only allowed metric for NCA.
+ * However, that is probably the best way to use this class.
+ *
+ * In addition to the standard Evaluate() and Gradient() functions which MLPACK
+ * optimizers use, overloads of Evaluate() and Gradient() are given which only
+ * operate on one point in the dataset. This is useful for optimizers like
+ * stochastic gradient descent (see mlpack::optimization::SGD).
*/
-template<typename Kernel>
+template<typename MetricType = metric::SquaredEuclideanDistance>
class SoftmaxErrorFunction
{
public:
@@ -44,10 +49,12 @@
*/
SoftmaxErrorFunction(const arma::mat& dataset,
const arma::uvec& labels,
- Kernel kernel = Kernel());
+ MetricType metric = MetricType());
/**
- * Evaluate the softmax function for the given covariance matrix.
+ * Evaluate the softmax function for the given covariance matrix. This is the
+ * non-separable implementation, where the objective function is not
+ * decomposed into the sum of several objective functions.
*
* @param covariance Covariance matrix of Mahalanobis distance.
*/
@@ -55,7 +62,8 @@
/**
* Evaluate the gradient of the softmax function for the given covariance
- * matrix.
+ * matrix. This is the non-separable implementation, where the objective
+ * function is not decomposed into the sum of several objective functions.
*
* @param covariance Covariance matrix of Mahalanobis distance.
* @param gradient Matrix to store the calculated gradient in.
@@ -68,23 +76,29 @@
const arma::mat GetInitialPoint() const;
private:
- const arma::mat& dataset_;
- const arma::uvec& labels_;
+ const arma::mat& dataset;
+ const arma::uvec& labels;
- Kernel kernel_;
+ MetricType metric;
- arma::mat last_coordinates_;
- arma::mat stretched_dataset_;
- arma::vec p_; // Holds calculated p_i.
- arma::vec denominators_; // Holds denominators for calculation of p_ij.
+ //! Last coordinates. Used for the non-separable Evaluate() and Gradient().
+ arma::mat lastCoordinates;
+ //! Stretched dataset. Used for the non-separable Evaluate() and Gradient().
+ arma::mat stretchedDataset;
+ //! Holds calculated p_i, for the non-separable Evaluate() and Gradient().
+ arma::vec p;
+ //! Holds denominators for calculation of p_ij, for the non-separable
+ //! Evaluate() and Gradient().
+ arma::vec denominators;
- //! False is nothing has ever been precalculated (only at construction time).
- bool precalculated_;
+ //! False if nothing has ever been precalculated (only at construction time).
+ bool precalculated;
/**
* Precalculate the denominators and numerators that will make up the p_ij,
* but only if the coordinates matrix is different than the last coordinates
- * the Precalculate() method was run with.
+ * the Precalculate() method was run with. This method is only called by the
+ * non-separable Evaluate() and Gradient().
*
* This will update last_coordinates_ and stretched_dataset_, and also
* calculate the p_i and denominators_ which are used in the calculation of
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp 2012-10-31 18:13:08 UTC (rev 13799)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_softmax_error_function_impl.hpp 2012-10-31 19:03:24 UTC (rev 13800)
@@ -14,28 +14,29 @@
namespace nca {
// Initialize with the given kernel.
-template<typename Kernel>
-SoftmaxErrorFunction<Kernel>::SoftmaxErrorFunction(const arma::mat& dataset,
- const arma::uvec& labels,
- Kernel kernel) :
- dataset_(dataset), labels_(labels), kernel_(kernel),
- last_coordinates_(dataset.n_rows, dataset.n_rows),
- precalculated_(false)
+template<typename MetricType>
+SoftmaxErrorFunction<MetricType>::SoftmaxErrorFunction(const arma::mat& dataset,
+ const arma::uvec& labels,
+ MetricType metric) :
+ dataset(dataset),
+ labels(labels),
+ metric(metric),
+ precalculated(false)
{ /* nothing to do */ }
-template<typename Kernel>
-double SoftmaxErrorFunction<Kernel>::Evaluate(const arma::mat& coordinates)
+template<typename MetricType>
+double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates)
{
// Calculate the denominators and numerators, if necessary.
Precalculate(coordinates);
- return -accu(p_); // Sum of p_i for all i. We negate because our solver
+ return -accu(p); // Sum of p_i for all i. We negate because our solver
// minimizes, not maximizes.
};
-template<typename Kernel>
-void SoftmaxErrorFunction<Kernel>::Gradient(const arma::mat& coordinates,
- arma::mat& gradient)
+template<typename MetricType>
+void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
+ arma::mat& gradient)
{
// Calculate the denominators and numerators, if necessary.
Precalculate(coordinates);
@@ -53,26 +54,26 @@
// otherwise, add
// (p_i p_ik + p_k p_ki) x_ik x_ik^T
arma::mat sum;
- sum.zeros(stretched_dataset_.n_rows, stretched_dataset_.n_rows);
- for (size_t i = 0; i < stretched_dataset_.n_cols; i++)
+ sum.zeros(stretchedDataset.n_rows, stretchedDataset.n_rows);
+ for (size_t i = 0; i < stretchedDataset.n_cols; i++)
{
- for (size_t k = (i + 1); k < stretched_dataset_.n_cols; k++)
+ for (size_t k = (i + 1); k < stretchedDataset.n_cols; k++)
{
// Calculate p_ik and p_ki first.
- double eval = exp(-kernel_.Evaluate(stretched_dataset_.unsafe_col(i),
- stretched_dataset_.unsafe_col(k)));
+ double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(k)));
double p_ik = 0, p_ki = 0;
- p_ik = eval / denominators_(i);
- p_ki = eval / denominators_(k);
+ p_ik = eval / denominators(i);
+ p_ki = eval / denominators(k);
// Subtract x_i from x_k. We are not using stretched points here.
- arma::vec x_ik = dataset_.col(i) - dataset_.col(k);
- arma::mat second_term = (x_ik * trans(x_ik));
+ arma::vec x_ik = dataset.col(i) - dataset.col(k);
+ arma::mat secondTerm = (x_ik * trans(x_ik));
- if (labels_[i] == labels_[k])
- sum += ((p_[i] - 1) * p_ik + (p_[k] - 1) * p_ki) * second_term;
+ if (labels[i] == labels[k])
+ sum += ((p[i] - 1) * p_ik + (p[k] - 1) * p_ki) * secondTerm;
else
- sum += (p_[i] * p_ik + p_[k] * p_ki) * second_term;
+ sum += (p[i] * p_ik + p[k] * p_ki) * secondTerm;
}
}
@@ -80,23 +81,27 @@
gradient = -2 * coordinates * sum;
}
-template<typename Kernel>
-const arma::mat SoftmaxErrorFunction<Kernel>::GetInitialPoint() const
+template<typename MetricType>
+const arma::mat SoftmaxErrorFunction<MetricType>::GetInitialPoint() const
{
- return arma::eye<arma::mat>(dataset_.n_rows, dataset_.n_rows);
+ return arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
}
-template<typename Kernel>
-void SoftmaxErrorFunction<Kernel>::Precalculate(const arma::mat& coordinates)
+template<typename MetricType>
+void SoftmaxErrorFunction<MetricType>::Precalculate(
+ const arma::mat& coordinates)
{
+ // Ensure it is the right size.
+ lastCoordinates.set_size(coordinates.n_rows, coordinates.n_cols);
+
// Make sure the calculation is necessary.
- if ((accu(coordinates == last_coordinates_) == coordinates.n_elem) &&
- precalculated_)
+ if ((accu(coordinates == lastCoordinates) == coordinates.n_elem) &&
+ precalculated)
return; // No need to calculate; we already have this stuff saved.
// Coordinates are different; save the new ones, and stretch the dataset.
- last_coordinates_ = coordinates;
- stretched_dataset_ = coordinates * dataset_;
+ lastCoordinates = coordinates;
+ stretchedDataset = coordinates * dataset;
// For each point i, we must evaluate the softmax function:
// p_ij = exp( -K(x_i, x_j) ) / ( sum_{k != i} ( exp( -K(x_i, x_k) )))
@@ -104,47 +109,47 @@
// We will do this by keeping track of the denominators for each i as well as
// the numerators (the sum for all j in class of i). This will be on the
// order of O((n * (n + 1)) / 2), which really isn't all that great.
- p_.zeros(stretched_dataset_.n_cols);
- denominators_.zeros(stretched_dataset_.n_cols);
- for (size_t i = 0; i < stretched_dataset_.n_cols; i++)
+ p.zeros(stretchedDataset.n_cols);
+ denominators.zeros(stretchedDataset.n_cols);
+ for (size_t i = 0; i < stretchedDataset.n_cols; i++)
{
- for (size_t j = (i + 1); j < stretched_dataset_.n_cols; j++)
+ for (size_t j = (i + 1); j < stretchedDataset.n_cols; j++)
{
- // Evaluate exp(-K(x_i, x_j)).
- double eval = exp(-kernel_.Evaluate(stretched_dataset_.unsafe_col(i),
- stretched_dataset_.unsafe_col(j)));
+ // Evaluate exp(-d(x_i, x_j)).
+ double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
+ stretchedDataset.unsafe_col(j)));
// Add this to the denominators of both i and j: p_ij = p_ji.
- denominators_[i] += eval;
- denominators_[j] += eval;
+ denominators[i] += eval;
+ denominators[j] += eval;
// If i and j are the same class, add to numerator of both.
- if (labels_[i] == labels_[j])
+ if (labels[i] == labels[j])
{
- p_[i] += eval;
- p_[j] += eval;
+ p[i] += eval;
+ p[j] += eval;
}
}
}
// Divide p_i by their denominators.
- p_ /= denominators_;
+ p /= denominators;
// Clean up any bad values.
- for (size_t i = 0; i < stretched_dataset_.n_cols; i++)
+ for (size_t i = 0; i < stretchedDataset.n_cols; i++)
{
- if (denominators_[i] == 0.0)
+ if (denominators[i] == 0.0)
{
Log::Debug << "Denominator of p_{" << i << ", j} is 0." << std::endl;
// Set to usable values.
- denominators_[i] = std::numeric_limits<double>::infinity();
- p_[i] = 0;
+ denominators[i] = std::numeric_limits<double>::infinity();
+ p[i] = 0;
}
}
// We've done a precalculation. Mark it as done.
- precalculated_ = true;
+ precalculated = true;
}
}; // namespace nca
More information about the mlpack-svn
mailing list