[mlpack-git] master: Refactor PCA class; able to use different decomposition methods. (5414842)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jul 6 15:30:09 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/98babfc774bce91170df994763b670b9abd20917...e7b9b042d1d6e2d9895d5fa141e9c135b2d2ea57

>---------------------------------------------------------------

commit 541484274e33e0d575243f7b7a72c249bca9615f
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Wed Jul 6 01:05:53 2016 +0200

    Refactor PCA class; able to use different decomposition methods.


>---------------------------------------------------------------

541484274e33e0d575243f7b7a72c249bca9615f
 src/mlpack/methods/pca/CMakeLists.txt            |   4 +-
 src/mlpack/methods/pca/pca.hpp                   |  82 +++++++++++-----
 src/mlpack/methods/pca/{pca.cpp => pca_impl.hpp} | 102 ++++++++++----------
 src/mlpack/methods/pca/pca_main.cpp              |  75 +++++++++++----
 src/mlpack/tests/pca_test.cpp                    | 116 +++++++++++++++++++----
 5 files changed, 268 insertions(+), 111 deletions(-)

diff --git a/src/mlpack/methods/pca/CMakeLists.txt b/src/mlpack/methods/pca/CMakeLists.txt
index 8023d83..5389ae0 100644
--- a/src/mlpack/methods/pca/CMakeLists.txt
+++ b/src/mlpack/methods/pca/CMakeLists.txt
@@ -2,7 +2,7 @@
 # Anything not in this list will not be compiled into mlpack.
 set(SOURCES
   pca.hpp
-  pca.cpp
+  pca_impl.hpp
 )
 
 # Add directory name to sources.
@@ -14,4 +14,6 @@ endforeach()
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 
+add_subdirectory(decomposition_policies)
+
 add_cli_executable(pca)
diff --git a/src/mlpack/methods/pca/pca.hpp b/src/mlpack/methods/pca/pca.hpp
index c698110..41c7517 100644
--- a/src/mlpack/methods/pca/pca.hpp
+++ b/src/mlpack/methods/pca/pca.hpp
@@ -1,26 +1,31 @@
 /**
  * @file pca.hpp
  * @author Ajinkya Kale
+ * @author Ryan Curtin
+ * @author Marcus Edel
  *
  * Defines the PCA class to perform Principal Components Analysis on the
- * specified data set.
+ * specified data set. There are many variations on how to do this, so
+ * template parameters allow the selection of different techniques.
  */
 #ifndef MLPACK_METHODS_PCA_PCA_HPP
 #define MLPACK_METHODS_PCA_PCA_HPP
 
 #include <mlpack/core.hpp>
+#include <mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp>
 
 namespace mlpack {
 namespace pca {
 
 /**
- * This class implements principal components analysis (PCA).  This is a common,
- * widely-used technique that is often used for either dimensionality reduction
- * or transforming data into a better basis.  Further information on PCA can be
- * found in almost any statistics or machine learning textbook, and all over the
- * internet.
+ * This class implements principal components analysis (PCA). This is a
+ * common, widely-used technique that is often used for either dimensionality
+ * reduction or transforming data into a better basis.  Further information on
+ * PCA can be found in almost any statistics or machine learning textbook, and
+ * all over the internet.
  */
-class PCA
+template<typename DecompositionPolicy = ExactSVDPolicy>
+class PCAType
 {
  public:
   /**
@@ -29,11 +34,12 @@ class PCA
    *
    * @param scaleData Whether or not to scale the data.
    */
-  PCA(const bool scaleData = false);
+  PCAType(const bool scaleData = false,
+          const DecompositionPolicy& decomposition = DecompositionPolicy());
 
   /**
-   * Apply Principal Component Analysis to the provided data set.  It is safe to
-   * pass the same matrix reference for both data and transformedData.
+   * Apply Principal Component Analysis to the provided data set. It is safe
+   * to pass the same matrix reference for both data and transformedData.
    *
    * @param data Data matrix.
    * @param transformedData Matrix to put results of PCA into.
@@ -43,11 +49,11 @@ class PCA
   void Apply(const arma::mat& data,
              arma::mat& transformedData,
              arma::vec& eigval,
-             arma::mat& eigvec) const;
+             arma::mat& eigvec);
 
   /**
-   * Apply Principal Component Analysis to the provided data set.  It is safe to
-   * pass the same matrix reference for both data and transformedData.
+   * Apply Principal Component Analysis to the provided data set. It is safe
+   * to pass the same matrix reference for both data and transformedData.
    *
    * @param data Data matrix.
    * @param transformedData Matrix to store results of PCA in.
@@ -55,29 +61,29 @@ class PCA
    */
   void Apply(const arma::mat& data,
              arma::mat& transformedData,
-             arma::vec& eigVal) const;
+             arma::vec& eigVal);
 
   /**
-   * Use PCA for dimensionality reduction on the given dataset.  This will save
+   * Use PCA for dimensionality reduction on the given dataset. This will save
    * the newDimension largest principal components of the data and remove the
-   * rest.  The parameter returned is the amount of variance of the data that is
-   * retained; this is a value between 0 and 1.  For instance, a value of 0.9
-   * indicates that 90% of the variance present in the data was retained.
+   * rest. The parameter returned is the amount of variance of the data that
+   * is retained; this is a value between 0 and 1.  For instance, a value of
+   * 0.9 indicates that 90% of the variance present in the data was retained.
    *
    * @param data Data matrix.
    * @param newDimension New dimension of the data.
    * @return Amount of the variance of the data retained (between 0 and 1).
    */
-  double Apply(arma::mat& data, const size_t newDimension) const;
+  double Apply(arma::mat& data, const size_t newDimension);
 
   //! This overload is here to make sure int gets casted right to size_t.
-  inline double Apply(arma::mat& data, const int newDimension) const
+  inline double Apply(arma::mat& data, const int newDimension)
   {
     return Apply(data, size_t(newDimension));
   }
 
   /**
-   * Use PCA for dimensionality reduction on the given dataset.  This will save
+   * Use PCA for dimensionality reduction on the given dataset. This will save
    * as many dimensions as necessary to retain at least the given amount of
    * variance (specified by parameter varRetained).  The amount should be
    * between 0 and 1; if the amount is 0, then only 1 dimension will be
@@ -91,23 +97,51 @@ class PCA
    *     between 0 and 1.
    * @return Actual amount of variance retained (between 0 and 1).
    */
-  double Apply(arma::mat& data, const double varRetained) const;
+  double Apply(arma::mat& data, const double varRetained);
 
-  //! Get whether or not this PCA object will scale (by standard deviation) the
-  //! data when PCA is performed.
+  //! Get whether or not this PCA object will scale (by standard deviation)
+  //! the data when PCA is performed.
   bool ScaleData() const { return scaleData; }
   //! Modify whether or not this PCA object will scale (by standard deviation)
   //! the data when PCA is performed.
   bool& ScaleData() { return scaleData; }
 
  private:
+  //! Scaling the data is when we reduce the variance of each dimension to 1.
+  void ScaleData(arma::mat& centeredData)
+  {
+    if (scaleData)
+    {
+      // Scaling the data is when we reduce the variance of each dimension
+      // to 1. We do this by dividing each dimension by its standard
+      // deviation.
+      arma::vec stdDev = arma::stddev(
+          centeredData, 0, 1 /* for each dimension */);
+
+      // If there are any zeroes, make them very small.
+      for (size_t i = 0; i < stdDev.n_elem; ++i)
+        if (stdDev[i] == 0)
+          stdDev[i] = 1e-50;
+
+      centeredData /= arma::repmat(stdDev, 1, centeredData.n_cols);
+    }
+  }
+
   //! Whether or not the data will be scaled by standard deviation when PCA is
   //! performed.
   bool scaleData;
 
+  //! Decomposition method used to perform principal components analysis.
+  DecompositionPolicy decomposition;
 }; // class PCA
 
+//! 3.0.0 TODO: break reverse-compatibility by changing PCAType to PCA.
+typedef PCAType<ExactSVDPolicy> PCA;
+
 } // namespace pca
 } // namespace mlpack
 
+// Include implementation.
+#include "pca_impl.hpp"
+
 #endif
diff --git a/src/mlpack/methods/pca/pca.cpp b/src/mlpack/methods/pca/pca_impl.hpp
similarity index 66%
rename from src/mlpack/methods/pca/pca.cpp
rename to src/mlpack/methods/pca/pca_impl.hpp
index 62c78de..9e3a89c 100644
--- a/src/mlpack/methods/pca/pca.cpp
+++ b/src/mlpack/methods/pca/pca_impl.hpp
@@ -1,19 +1,29 @@
 /**
  * @file pca.cpp
  * @author Ajinkya Kale
+ * @author Ryan Curtin
+ * @author Marcus Edel
  *
  * Implementation of PCA class to perform Principal Components Analysis on the
  * specified data set.
  */
-#include "pca.hpp"
+
+#ifndef MLPACK_METHODS_PCA_PCA_IMPL_HPP
+#define MLPACK_METHODS_PCA_PCA_IMPL_HPP
+
 #include <mlpack/core.hpp>
+#include "pca.hpp"
 
 using namespace std;
-using namespace mlpack;
-using namespace mlpack::pca;
 
-PCA::PCA(const bool scaleData) :
-    scaleData(scaleData)
+namespace mlpack {
+namespace pca {
+
+template<typename DecompositionPolicy>
+PCAType<DecompositionPolicy>::PCAType(const bool scaleData,
+                                      const DecompositionPolicy& decomposition) :
+    scaleData(scaleData),
+    decomposition(decomposition)
 { }
 
 /**
@@ -24,54 +34,23 @@ PCA::PCA(const bool scaleData) :
  * @param eigVal - contains eigen values in a column vector
  * @param coeff - PCA Loadings/Coeffs/EigenVectors
  */
-void PCA::Apply(const arma::mat& data,
-                arma::mat& transformedData,
-                arma::vec& eigVal,
-                arma::mat& coeff) const
+template<typename DecompositionPolicy>
+void PCAType<DecompositionPolicy>::Apply(const arma::mat& data,
+                                         arma::mat& transformedData,
+                                         arma::vec& eigVal,
+                                         arma::mat& coeff)
 {
   Timer::Start("pca");
 
-  // This matrix will store the right singular values; we do not need them.
-  arma::mat v;
-
   // Center the data into a temporary matrix.
   arma::mat centeredData;
   math::Center(data, centeredData);
 
-  if (scaleData)
-  {
-    // Scaling the data is when we reduce the variance of each dimension to 1.
-    // We do this by dividing each dimension by its standard deviation.
-    arma::vec stdDev = arma::stddev(centeredData, 0, 1 /* for each dimension */);
-
-    // If there are any zeroes, make them very small.
-    for (size_t i = 0; i < stdDev.n_elem; ++i)
-      if (stdDev[i] == 0)
-        stdDev[i] = 1e-50;
-
-    centeredData /= arma::repmat(stdDev, 1, centeredData.n_cols);
-  }
-
-  // Do singular value decomposition.  Use the economical singular value
-  // decomposition if the columns are much larger than the rows.
-  if (data.n_rows < data.n_cols)
-  {
-    // Do economical singular value decomposition and compute only the left
-    // singular vectors.
-    arma::svd_econ(coeff, eigVal, v, centeredData, 'l');
-  }
-  else
-  {
-    arma::svd(coeff, eigVal, v, centeredData);
-  }
-
-  // Now we must square the singular values to get the eigenvalues.
-  // In addition we must divide by the number of points, because the covariance
-  // matrix is X * X' / (N - 1).
-  eigVal %= eigVal / (data.n_cols - 1);
+  // Scale the data if the user ask for.
+  ScaleData(centeredData);
 
-  // Project the samples to the principals.
-  transformedData = arma::trans(coeff) * centeredData;
+  decomposition.Apply(data, centeredData, transformedData, eigVal, coeff,
+      data.n_rows);
 
   Timer::Stop("pca");
 }
@@ -83,9 +62,10 @@ void PCA::Apply(const arma::mat& data,
  * @param transformedData - Data with PCA applied
  * @param eigVal - contains eigen values in a column vector
  */
-void PCA::Apply(const arma::mat& data,
-                arma::mat& transformedData,
-                arma::vec& eigVal) const
+template<typename DecompositionPolicy>
+void PCAType<DecompositionPolicy>::Apply(const arma::mat& data,
+                                         arma::mat& transformedData,
+                                         arma::vec& eigVal)
 {
   arma::mat coeffs;
   Apply(data, transformedData, eigVal, coeffs);
@@ -102,7 +82,9 @@ void PCA::Apply(const arma::mat& data,
  * @param newDimension New dimension of the data.
  * @return Amount of the variance of the data retained (between 0 and 1).
  */
-double PCA::Apply(arma::mat& data, const size_t newDimension) const
+template<typename DecompositionPolicy>
+double PCAType<DecompositionPolicy>::Apply(arma::mat& data,
+                                           const size_t newDimension)
 {
   // Parameter validation.
   if (newDimension == 0)
@@ -116,7 +98,16 @@ double PCA::Apply(arma::mat& data, const size_t newDimension) const
   arma::mat coeffs;
   arma::vec eigVal;
 
-  Apply(data, data, eigVal, coeffs);
+  Timer::Start("pca");
+
+  // Center the data into a temporary matrix.
+  arma::mat centeredData;
+  math::Center(data, centeredData);
+
+  // Scale the data if the user ask for.
+  ScaleData(centeredData);
+
+  decomposition.Apply(data, centeredData, data, eigVal, coeffs, newDimension);
 
   if (newDimension < coeffs.n_rows)
     // Drop unnecessary rows.
@@ -126,6 +117,8 @@ double PCA::Apply(arma::mat& data, const size_t newDimension) const
   // the right dimension before calculating the amount of variance retained.
   double eigDim = std::min(newDimension - 1, (size_t) eigVal.n_elem - 1);
 
+  Timer::Stop("pca");
+
   // Calculate the total amount of variance retained.
   return (sum(eigVal.subvec(0, eigDim)) / sum(eigVal));
 }
@@ -140,7 +133,9 @@ double PCA::Apply(arma::mat& data, const size_t newDimension) const
  * The method returns the actual amount of variance retained, which will
  * always be greater than or equal to the varRetained parameter.
  */
-double PCA::Apply(arma::mat& data, const double varRetained) const
+template<typename DecompositionPolicy>
+double PCAType<DecompositionPolicy>::Apply(arma::mat& data,
+                                           const double varRetained)
 {
   // Parameter validation.
   if (varRetained < 0)
@@ -171,3 +166,8 @@ double PCA::Apply(arma::mat& data, const double varRetained) const
 
   return varSum;
 }
+
+} // namespace pca
+} // namespace mlpack
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/pca/pca_main.cpp b/src/mlpack/methods/pca/pca_main.cpp
index 8bbbedf..9079391 100644
--- a/src/mlpack/methods/pca/pca_main.cpp
+++ b/src/mlpack/methods/pca/pca_main.cpp
@@ -1,12 +1,16 @@
 /**
  * @file pca_main.cpp
  * @author Ryan Curtin
+ * @author Marcus Edel
  *
  * Main executable to run PCA.
  */
 #include <mlpack/core.hpp>
 
 #include "pca.hpp"
+#include <mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp>
+#include <mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp>
+#include <mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp>
 
 using namespace mlpack;
 using namespace mlpack::pca;
@@ -14,10 +18,10 @@ using namespace std;
 
 // Document program.
 PROGRAM_INFO("Principal Components Analysis", "This program performs principal "
-    "components analysis on the given dataset.  It will transform the data "
-    "onto its principal components, optionally performing dimensionality "
-    "reduction by ignoring the principal components with the smallest "
-    "eigenvalues.");
+    "components analysis on the given dataset using the exact, randomized or "
+    "QUIC SVD method. It will transform the data onto its principal components,"
+    " optionally performing dimensionality reduction by ignoring the principal "
+    "components with the smallest eigenvalues.");
 
 // Parameters for program.
 PARAM_STRING_REQ("input_file", "Input dataset to perform PCA on.", "i");
@@ -31,6 +35,40 @@ PARAM_DOUBLE("var_to_retain", "Amount of variance to retain; should be between "
 PARAM_FLAG("scale", "If set, the data will be scaled before running PCA, such "
     "that the variance of each feature is 1.", "s");
 
+PARAM_STRING("decomposition_method", "Method used for the principal"
+    "components analysis: 'exact', 'randomized', 'quic'.", "c", "exact");
+
+
+//! Run RunPCA on the specified dataset with the given decomposition method.
+template<typename DecompositionPolicy>
+void RunPCA(arma::mat& dataset,
+            const size_t newDimension,
+            const size_t scale,
+            const double varToRetain)
+{
+  PCAType<DecompositionPolicy> p(scale);
+
+  Log::Info << "Performing PCA on dataset..." << endl;
+  double varRetained;
+
+  if (varToRetain != 0)
+  {
+    if (newDimension != 0)
+      Log::Warn << "New dimensionality (-d) ignored because -V was specified."
+          << endl;
+
+    varRetained = p.Apply(dataset, varToRetain);
+  }
+  else
+  {
+    varRetained = p.Apply(dataset, newDimension);
+  }
+
+  Log::Info << (varRetained * 100) << "% of variance retained (" <<
+      dataset.n_rows << " dimensions)." << endl;
+
+}
+
 int main(int argc, char** argv)
 {
   // Parse commandline.
@@ -57,27 +95,30 @@ int main(int argc, char** argv)
 
   // Get the options for running PCA.
   const size_t scale = CLI::HasParam("scale");
+  const double varToRetain = CLI::GetParam<double>("var_to_retain");
+  const string decompositionMethod = CLI::GetParam<string>(
+      "decomposition_method");
 
   // Perform PCA.
-  PCA p(scale);
-  Log::Info << "Performing PCA on dataset..." << endl;
-  double varRetained;
-  if (CLI::GetParam<double>("var_to_retain") != 0)
+  if (decompositionMethod == "exact")
   {
-    if (CLI::GetParam<int>("new_dimensionality") != 0)
-      Log::Warn << "New dimensionality (-d) ignored because --var_to_retain was"
-          << " specified." << endl;
-
-    varRetained = p.Apply(dataset, CLI::GetParam<double>("var_to_retain"));
+    RunPCA<ExactSVDPolicy>(dataset, newDimension, scale, varToRetain);
+  }
+  else if(decompositionMethod == "randomized")
+  {
+    RunPCA<RandomizedSVDPolicy>(dataset, newDimension, scale, varToRetain);
+  }
+  else if(decompositionMethod == "quic")
+  {
+    RunPCA<QUICSVDPolicy>(dataset, newDimension, scale, varToRetain);
   }
   else
   {
-    varRetained = p.Apply(dataset, newDimension);
+    // Invalid decomposition method.
+    Log::Fatal << "Invalid decomposition method ('" << decompositionMethod
+        << "'); valid choices are 'exact', 'randomized', 'quic'." << endl;
   }
 
-  Log::Info << (varRetained * 100) << "% of variance retained (" <<
-      dataset.n_rows << " dimensions)." << endl;
-
   // Now save the results.
   string outputFile = CLI::GetParam<string>("output_file");
   data::Save(outputFile, dataset);
diff --git a/src/mlpack/tests/pca_test.cpp b/src/mlpack/tests/pca_test.cpp
index e4f3b6a..b8bc88a 100644
--- a/src/mlpack/tests/pca_test.cpp
+++ b/src/mlpack/tests/pca_test.cpp
@@ -1,37 +1,41 @@
 /**
  * @file pca_test.cpp
  * @author Ajinkya Kale
+ * @author Marcus Edel
  *
  * Test file for PCA class.
  */
 #include <mlpack/core.hpp>
 #include <mlpack/methods/pca/pca.hpp>
+#include <mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp>
+#include <mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp>
+#include <mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp>
 
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
 
 BOOST_AUTO_TEST_SUITE(PCATest);
 
-using namespace std;
 using namespace arma;
 using namespace mlpack;
 using namespace mlpack::pca;
 using namespace mlpack::distribution;
 
-/**
- * Compare the output of our PCA implementation with Armadillo's.
+/*
+ * Compare the output of the our PCA implementation with Armadillo's using the
+ * specified decomposition policy.
  */
-BOOST_AUTO_TEST_CASE(ArmaComparisonPCATest)
+template<typename DecompositionPolicy>
+void ArmaComparisonPCA()
 {
-  mat coeff, coeff1;
-  vec eigVal, eigVal1;
-  mat score, score1;
+  arma::mat coeff, coeff1, score, score1;
+  arma::vec eigVal, eigVal1;
 
-  mat data = randu<mat>(100, 100);
+  arma::mat data = arma::randu<arma::mat>(3, 1000);
 
-  PCA p;
+  PCAType<DecompositionPolicy> exactPCA;
+  exactPCA.Apply(data, score1, eigVal1, coeff1);
 
-  p.Apply(data, score1, eigVal1, coeff1);
   princomp(coeff, score, eigVal, trans(data));
 
   // Verify the PCA results based on the eigenvalues.
@@ -44,11 +48,12 @@ BOOST_AUTO_TEST_CASE(ArmaComparisonPCATest)
   }
 }
 
-/**
+/*
  * Test that dimensionality reduction with PCA works the same way MATLAB does
- * (which should be correct!).
+ * (which should be correct!) using the specified decomposition policy.
  */
-BOOST_AUTO_TEST_CASE(PCADimensionalityReductionTest)
+template<typename DecompositionPolicy>
+void PCADimensionalityReduction()
 {
   // Fake, simple dataset.  The results we will compare against are from MATLAB.
   mat data("1 0 2 3 9;"
@@ -56,7 +61,7 @@ BOOST_AUTO_TEST_CASE(PCADimensionalityReductionTest)
            "6 7 3 1 8");
 
   // Now run PCA to reduce the dimensionality.
-  PCA p;
+  PCAType<DecompositionPolicy> p;
   const double varRetained = p.Apply(data, 2); // Reduce to 2 dimensions.
 
   // Compare with correct results.
@@ -87,11 +92,12 @@ BOOST_AUTO_TEST_CASE(PCADimensionalityReductionTest)
 
 /**
  * Test that setting the variance retained parameter to perform dimensionality
- * reduction works.
+ * reduction works using the specified decomposition policy.
  */
-BOOST_AUTO_TEST_CASE(PCAVarianceRetainedTest)
+template<typename DecompositionPolicy>
+void PCAVarianceRetained()
 {
-  // Fake, simple dataset.
+    // Fake, simple dataset.
   mat data("1 0 2 3 9;"
            "5 2 8 4 8;"
            "6 7 3 1 8");
@@ -105,7 +111,7 @@ BOOST_AUTO_TEST_CASE(PCAVarianceRetainedTest)
   // and if we keep two, the actual variance retained is
   //   0.904876047045906
   // and if we keep three, the actual variance retained is 1.
-  PCA p;
+  PCAType<DecompositionPolicy> p;
   arma::mat origData = data;
   double varRetained = p.Apply(data, 0.1);
 
@@ -150,6 +156,80 @@ BOOST_AUTO_TEST_CASE(PCAVarianceRetainedTest)
 }
 
 /**
+ * Compare the output of our exact PCA implementation with Armadillo's.
+ */
+BOOST_AUTO_TEST_CASE(ArmaComparisonExactPCATest)
+{
+  ArmaComparisonPCA<ExactSVDPolicy>();
+}
+
+/**
+ * Compare the output of our randomized-SVD PCA implementation with Armadillo's.
+ */
+BOOST_AUTO_TEST_CASE(ArmaComparisonRandomizedPCATest)
+{
+  ArmaComparisonPCA<RandomizedSVDPolicy>();
+}
+
+/**
+ * Test that dimensionality reduction with exact-svd PCA works the same way
+ * MATLAB does (which should be correct!).
+ */
+BOOST_AUTO_TEST_CASE(ExactPCADimensionalityReductionTest)
+{
+  PCADimensionalityReduction<ExactSVDPolicy>();
+}
+
+/**
+ * Test that dimensionality reduction with randomized-svd PCA works the same way
+ * MATLAB does (which should be correct!).
+ */
+BOOST_AUTO_TEST_CASE(RandomizedPCADimensionalityReductionTest)
+{
+  PCADimensionalityReduction<RandomizedSVDPolicy>();
+}
+
+/**
+ * Test that dimensionality reduction with QUIC-SVD PCA works the same way
+ * as the Exact-SVD PCA method.
+ */
+BOOST_AUTO_TEST_CASE(QUICPCADimensionalityReductionTest)
+{
+  arma::mat data, data1;
+  data::Load("test_data_3_1000.csv", data);
+  data1 = data;
+
+  PCAType<ExactSVDPolicy> exactPCA;
+  const double varRetainedExact = exactPCA.Apply(data, 1);
+
+  PCAType<QUICSVDPolicy> quicPCA;
+  const double varRetainedQUIC = quicPCA.Apply(data1, 1);
+
+  BOOST_REQUIRE_CLOSE(varRetainedExact, varRetainedQUIC, 4.0);
+
+  BOOST_REQUIRE_EQUAL(data.n_rows, data1.n_rows);
+  BOOST_REQUIRE_EQUAL(data.n_cols, data1.n_cols);
+}
+
+/**
+ * Test that setting the variance retained parameter to perform dimensionality
+ * reduction works using the exact svd PCA method.
+ */
+BOOST_AUTO_TEST_CASE(ExactPCAVarianceRetainedTest)
+{
+  PCAVarianceRetained<ExactSVDPolicy>();
+}
+
+/**
+ * Test that setting the variance retained parameter to perform dimensionality
+ * reduction works using the randomized svd PCA method.
+ */
+BOOST_AUTO_TEST_CASE(RandomizedPCAVarianceRetainedTest)
+{
+  PCAVarianceRetained<RandomizedSVDPolicy>();
+}
+
+/**
  * Test that scaling PCA works.
  */
 BOOST_AUTO_TEST_CASE(PCAScalingTest)




More information about the mlpack-git mailing list