[mlpack-git] master: Add exact, randomized and QUIC SVD decomposition policies; meant to be used in conjunction with the PCA class. (057b1c3)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jul 6 15:30:09 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/98babfc774bce91170df994763b670b9abd20917...e7b9b042d1d6e2d9895d5fa141e9c135b2d2ea57

>---------------------------------------------------------------

commit 057b1c35206cf68ace238b26d920f5cbe32af926
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Wed Jul 6 01:04:31 2016 +0200

    Add exact, randomized and QUIC SVD decomposition policies; meant to be used in conjunction with the PCA class.


>---------------------------------------------------------------

057b1c35206cf68ace238b26d920f5cbe32af926
 .../decomposition_policies}/CMakeLists.txt         |  5 +-
 .../decomposition_policies/exact_svd_method.hpp    | 72 ++++++++++++++++
 .../pca/decomposition_policies/quic_svd_method.hpp | 92 ++++++++++++++++++++
 .../randomized_svd_method.hpp                      | 97 ++++++++++++++++++++++
 4 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
similarity index 85%
copy from src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
copy to src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
index 99b6b80..968c7cc 100644
--- a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
+++ b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
@@ -1,8 +1,9 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into mlpack.
 set(SOURCES
-  max_pooling.hpp
-  mean_pooling.hpp
+  exact_svd_method.hpp
+  randomized_svd_method.hpp
+  quic_svd_method.hpp
 )
 
 # Add directory name to sources.
diff --git a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
new file mode 100644
index 0000000..7b24b15
--- /dev/null
+++ b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
@@ -0,0 +1,72 @@
+/**
+ * @file exact_svd_method.hpp
+ * @author Ajinkya Kale
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the exact svd method for use in the Principal Components
+ * Analysis method.
+ */
+
+#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_EXACT_SVD_METHOD_HPP
+#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_EXACT_SVD_METHOD_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace pca {
+
+/**
+ * Implementation of the exact SVD policy.
+ */
+class ExactSVDPolicy
+{
+  public:
+  /**
+   * Apply Principal Component Analysis to the provided data set using the
+   * exact SVD method.
+   *
+   * @param data Data matrix.
+   * @param centeredData Centered data matrix.
+   * @param transformedData Matrix to put results of PCA into.
+   * @param eigVal Vector to put eigenvalues into.
+   * @param eigvec Matrix to put eigenvectors (loadings) into.
+   * @param rank Rank of the decomposition.
+   */
+  void Apply(const arma::mat& data,
+             const arma::mat& centeredData,
+             arma::mat& transformedData,
+             arma::vec& eigVal,
+             arma::mat& eigvec,
+             const size_t /* rank */)
+  {
+    // This matrix will store the right singular values; we do not need them.
+    arma::mat v;
+
+    // Do singular value decomposition.  Use the economical singular value
+    // decomposition if the columns are much larger than the rows.
+    if (data.n_rows < data.n_cols)
+    {
+      // Do economical singular value decomposition and compute only the left
+      // singular vectors.
+      arma::svd_econ(eigvec, eigVal, v, centeredData, 'l');
+    }
+    else
+    {
+      arma::svd(eigvec, eigVal, v, centeredData);
+    }
+
+    // Now we must square the singular values to get the eigenvalues.
+    // In addition we must divide by the number of points, because the
+    // covariance matrix is X * X' / (N - 1).
+    eigVal %= eigVal / (data.n_cols - 1);
+
+    // Project the samples to the principals.
+    transformedData = arma::trans(eigvec) * centeredData;
+  }
+};
+
+} // namespace pca
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
new file mode 100644
index 0000000..c866a8f
--- /dev/null
+++ b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
@@ -0,0 +1,92 @@
+/**
+ * @file quic_svd_method.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the QUIC-SVD policy for use in the Principal Components
+ * Analysis method.
+ */
+
+#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_QUIC_SVD_METHOD_HPP
+#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_QUIC_SVD_METHOD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/quic_svd/quic_svd.hpp>
+
+namespace mlpack {
+namespace pca {
+
+/**
+ * Implementation of the QUIC-SVD policy.
+ */
+class QUICSVDPolicy
+{
+  public:
+
+  /**
+   * Use QUIC-SVD method to perform the principal components analysis (PCA).
+   *
+   * @param epsilon Error tolerance fraction for calculated subspace.
+   * @param delta Cumulative probability for Monte Carlo error lower bound.
+   */
+  QUICSVDPolicy(const double epsilon = 0.03, const double delta = 0.1) :
+       epsilon(epsilon),
+       delta(delta)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Apply Principal Component Analysis to the provided data set using the
+   * QUIC-SVD method.
+   *
+   * @param data Data matrix.
+   * @param centeredData Centered data matrix.
+   * @param transformedData Matrix to put results of PCA into.
+   * @param eigVal Vector to put eigenvalues into.
+   * @param eigvec Matrix to put eigenvectors (loadings) into.
+   * @param rank Rank of the decomposition.
+   */
+  void Apply(const arma::mat& data,
+             const arma::mat& centeredData,
+             arma::mat& transformedData,
+             arma::vec& eigVal,
+             arma::mat& eigvec,
+             const size_t /* rank */)
+  {
+    // This matrix will store the right singular values; we do not need them.
+    arma::mat v, sigma;
+
+    // Do singular value decomposition using the QUIC-SVD algorithm.
+    svd::QUIC_SVD quicsvd(centeredData, eigvec, v, sigma, epsilon, delta);
+
+    // Now we must square the singular values to get the eigenvalues.
+    // In addition we must divide by the number of points, because the
+    // covariance matrix is X * X' / (N - 1).
+    eigVal = arma::pow(arma::diagvec(sigma), 2) / (data.n_cols - 1);
+
+    // Project the samples to the principals.
+    transformedData = arma::trans(eigvec) * centeredData;
+  }
+
+  //! Get the error tolerance fraction for calculated subspace.
+  double Epsilon() const { return epsilon; }
+  //! Modify the error tolerance fraction for calculated subspace.
+  double& Epsilon() { return epsilon; }
+
+  //! Get the cumulative probability for Monte Carlo error lower bound.
+  double Delta() const { return delta; }
+  //! Modify the cumulative probability for Monte Carlo error lower bound.
+  double& Delta() { return delta; }
+
+  private:
+    //! Error tolerance fraction for calculated subspace.
+    double epsilon;
+
+    //! Cumulative probability for Monte Carlo error lower bound.
+    double delta;
+};
+
+} // namespace pca
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
new file mode 100644
index 0000000..767eb9b
--- /dev/null
+++ b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
@@ -0,0 +1,97 @@
+/**
+ * @file randomized_svd_method.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the randomized svd method for use in the Principal
+ * Components Analysis method.
+ */
+
+#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
+#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/randomized_svd/randomized_svd.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+
+namespace mlpack {
+namespace pca {
+
+/**
+ * Implementation of the randomized SVD policy.
+ */
+class RandomizedSVDPolicy
+{
+  public:
+  /**
+   * Use randomized SVD method to perform the principal components analysis
+   * (PCA).
+   *
+   * @param iteratedPower Size of the normalized power iterations
+   *        (Default: rank + 2).
+   * @param maxIterations Number of iterations for the power method
+   *        (Default: 2).
+   */
+  RandomizedSVDPolicy(const size_t iteratedPower = 0,
+                      const size_t maxIterations = 2) :
+      iteratedPower(iteratedPower),
+      maxIterations(maxIterations)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Apply Principal Component Analysis to the provided data set using the
+   * randomized SVD.
+   *
+   * @param data Data matrix.
+   * @param centeredData Centered data matrix.
+   * @param transformedData Matrix to put results of PCA into.
+   * @param eigVal Vector to put eigenvalues into.
+   * @param eigvec Matrix to put eigenvectors (loadings) into.
+   * @param rank Rank of the decomposition.
+   */
+  void Apply(const arma::mat& data,
+             const arma::mat& centeredData,
+             arma::mat& transformedData,
+             arma::vec& eigVal,
+             arma::mat& eigvec,
+             const size_t rank)
+  {
+    // This matrix will store the right singular values; we do not need them.
+    arma::mat v;
+
+    // Do singular value decomposition using the randomized SVD algorithm.
+    svd::RandomizedSVD rsvd(iteratedPower, maxIterations);
+    rsvd.Apply(data, eigvec, eigVal, v, rank);
+
+    // Now we must square the singular values to get the eigenvalues.
+    // In addition we must divide by the number of points, because the
+    // covariance matrix is X * X' / (N - 1).
+    eigVal %= eigVal / (data.n_cols - 1);
+
+    // Project the samples to the principals.
+    transformedData = arma::trans(eigvec) * centeredData;
+  }
+
+  //! Get the size of the normalized power iterations.
+  size_t IteratedPower() const { return iteratedPower; }
+  //! Modify the size of the normalized power iterations.
+  size_t& IteratedPower() { return iteratedPower; }
+
+  //! Get the number of iterations for the power method.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the number of iterations for the power method.
+  size_t& MaxIterations() { return maxIterations; }
+
+  private:
+    //! Locally stored size of the normalized power iterations.
+    size_t iteratedPower;
+
+    //! Locally stored number of iterations for the power method.
+    size_t maxIterations;
+};
+
+} // namespace pca
+} // namespace mlpack
+
+#endif




More information about the mlpack-git mailing list