[mlpack-svn] r15763 - mlpack/trunk/src/mlpack/methods/pca

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Sep 11 18:15:09 EDT 2013


Author: rcurtin
Date: Wed Sep 11 18:15:09 2013
New Revision: 15763

Log:
Add -V option to specify how much variance to retain.


Modified:
   mlpack/trunk/src/mlpack/methods/pca/pca_main.cpp

Modified: mlpack/trunk/src/mlpack/methods/pca/pca_main.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/pca/pca_main.cpp	(original)
+++ mlpack/trunk/src/mlpack/methods/pca/pca_main.cpp	Wed Sep 11 18:15:09 2013
@@ -24,6 +24,8 @@
 PARAM_STRING_REQ("output_file", "File to save modified dataset to.", "o");
 PARAM_INT("new_dimensionality", "Desired dimensionality of output dataset.  If "
     "0, no dimensionality reduction is performed.", "d", 0);
+PARAM_DOUBLE("var_to_retain", "Amount of variance to retain; should be between "
+    "0 and 1.  If 1, all variance is retained.  Overrides -d.", "V", 0);
 
 PARAM_FLAG("scale", "If set, the data will be scaled before running PCA, such "
     "that the variance of each feature is 1.", "s");
@@ -36,7 +38,7 @@
   // Load input dataset.
   string inputFile = CLI::GetParam<string>("input_file");
   arma::mat dataset;
-  data::Load(inputFile.c_str(), dataset);
+  data::Load(inputFile, dataset);
 
   // Find out what dimension we want.
   size_t newDimension = dataset.n_rows; // No reduction, by default.
@@ -58,7 +60,22 @@
   // Perform PCA.
   PCA p(scale);
   Log::Info << "Performing PCA on dataset..." << endl;
-  p.Apply(dataset, newDimension);
+  double varRetained;
+  if (CLI::GetParam<double>("var_to_retain") != 0)
+  {
+    if (CLI::GetParam<int>("new_dimensionality") != 0)
+      Log::Warn << "New dimensionality (-d) ignored because -V was specified."
+          << endl;
+
+    varRetained = p.Apply(dataset, CLI::GetParam<double>("var_to_retain"));
+  }
+  else
+  {
+    varRetained = p.Apply(dataset, newDimension);
+  }
+
+  Log::Info << (varRetained * 100) << "% of variance retained (" <<
+      dataset.n_rows << " dimensions)." << endl;
 
   // Now save the results.
   string outputFile = CLI::GetParam<string>("output_file");



More information about the mlpack-svn mailing list