[mlpack-svn] r10810 - mlpack/trunk/src/mlpack/methods/nca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Dec 14 17:28:08 EST 2011
Author: rcurtin
Date: 2011-12-14 17:28:08 -0500 (Wed, 14 Dec 2011)
New Revision: 10810
Modified:
mlpack/trunk/src/mlpack/methods/nca/nca.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
Log:
Document NCA.
Modified: mlpack/trunk/src/mlpack/methods/nca/nca.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca.hpp 2011-12-14 22:13:14 UTC (rev 10809)
+++ mlpack/trunk/src/mlpack/methods/nca/nca.hpp 2011-12-14 22:28:08 UTC (rev 10810)
@@ -11,7 +11,7 @@
#include <mlpack/core/metrics/lmetric.hpp>
namespace mlpack {
-namespace nca {
+namespace nca /** Neighborhood Components Analysis. */ {
/**
* An implementation of Neighborhood Components Analysis, both a linear
@@ -24,6 +24,7 @@
*
* For more details, see the following published paper:
*
+ * @code
* @inproceedings{Goldberger2004,
* author = {Goldberger, Jacob and Roweis, Sam and Hinton, Geoff and
* Salakhutdinov, Ruslan},
@@ -33,6 +34,7 @@
* title = {{Neighbourhood Components Analysis}},
* year = {2004}
* }
+ * @endcode
*/
template<typename Kernel>
class NCA
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp 2011-12-14 22:13:14 UTC (rev 10809)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_impl.hpp 2011-12-14 22:28:08 UTC (rev 10810)
@@ -32,7 +32,11 @@
// We will use the L-BFGS optimizer to optimize the stretching matrix.
optimization::L_BFGS<SoftmaxErrorFunction<Kernel> > lbfgs(error_func, 10);
+ Timer::Start("nca_lbfgs_optimization");
+
lbfgs.Optimize(0, output_matrix);
+
+ Timer::Stop("nca_lbfgs_optimization");
}
}; // namespace nca
Modified: mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2011-12-14 22:13:14 UTC (rev 10809)
+++ mlpack/trunk/src/mlpack/methods/nca/nca_main.cpp 2011-12-14 22:28:08 UTC (rev 10810)
@@ -10,35 +10,80 @@
#include "nca.hpp"
// Define parameters.
-PROGRAM_INFO("Neighborhood Components Analysis",
- "documentation not done yet");
+PROGRAM_INFO("Neighborhood Components Analysis (NCA)",
+ "This program implements Neighborhood Components Analysis, both a linear "
+ "dimensionality reduction technique and a distance learning technique. The"
+ " method seeks to improve k-nearest-neighbor classification on a dataset "
+ "by scaling the dimensions. The method is nonparametric, and does not "
+ "require a value of k. It works by using stochastic (\"soft\") neighbor "
+ "assignments and using optimization techniques over the gradient of the "
+ "accuracy of the neighbor assignments.\n"
+ "\n"
+ "For more details, see the following published paper:\n\n"
+ "@inproceedings{\n"
+ " author = {Goldberge, Jacob and Roweis, Sam and Hinton, Geoff and\n"
+ " Salakhutdinov, Ruslan},\n"
+ " booktitle = {Advances in Neural Information Processing Systems 17},\n"
+ " pages = {513--520},\n"
+ " publisher = {MIT Press},\n"
+ " title = {{Neighbourhood Components Analysis}},\n"
+ " year = {2004}\n"
+ "}\n"
+ "\n"
+ "To work, this algorithm needs labeled data. It can be given as the last "
+ "row of the input dataset (--input_file), or alternatively in a separate "
+ "file (--labels_file).");
-PARAM_STRING_REQ("input_file", "Input dataset to run NCA on.", "I");
-PARAM_STRING_REQ("output_file", "Output file for learned distance matrix.", "O");
+PARAM_STRING_REQ("input_file", "Input dataset to run NCA on.", "i");
+PARAM_STRING_REQ("output_file", "Output file for learned distance matrix.",
+ "o");
+PARAM_STRING("labels_file", "File of labels for input dataset.", "l", "");
using namespace mlpack;
using namespace mlpack::nca;
using namespace mlpack::metric;
using namespace std;
+using namespace arma;
int main(int argc, char* argv[])
{
+ // Parse command line.
CLI::ParseCommandLine(argc, argv);
- arma::mat data;
- data::Load(CLI::GetParam<string>("input_file").c_str(), data, true);
+ const string inputFile = CLI::GetParam<string>("input_file");
+ const string labelsFile = CLI::GetParam<string>("labels_file");
+ const string outputFile = CLI::GetParam<string>("output_file");
- arma::uvec labels(data.n_cols);
- for (size_t i = 0; i < data.n_cols; i++)
- labels[i] = (int) data(data.n_rows - 1, i);
+ // Load data.
+ mat data;
+ data::Load(inputFile.c_str(), data, true);
- data.shed_row(data.n_rows - 1);
+ // Do we want to load labels separately?
+ umat labels(data.n_cols, 1);
+ if (labelsFile != "")
+ {
+ data::Load(labelsFile.c_str(), labels, true);
- NCA<LMetric<2> > nca(data, labels);
+ if (labels.n_rows == 1)
+ labels = trans(labels);
- arma::mat distance;
+ if (labels.n_cols > 1)
+ Log::Fatal << "Labels must have only one column or row!" << endl;
+ }
+ else
+ {
+ for (size_t i = 0; i < data.n_cols; i++)
+ labels[i] = (int) data(data.n_rows - 1, i);
+ data.shed_row(data.n_rows - 1);
+ }
+
+ // Now create the NCA object and run the optimization.
+ NCA<LMetric<2> > nca(data, labels.unsafe_col(0));
+
+ mat distance;
nca.LearnDistance(distance);
+ // Save the output.
data::Save(CLI::GetParam<string>("output_file").c_str(), distance, true);
}
More information about the mlpack-svn
mailing list