[mlpack-git] master: Add exact, randomized and QUIC SVD decomposition policies; meant to be used in conjunction with the PCA class. (057b1c3)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Jul 6 15:30:09 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/98babfc774bce91170df994763b670b9abd20917...e7b9b042d1d6e2d9895d5fa141e9c135b2d2ea57
>---------------------------------------------------------------
commit 057b1c35206cf68ace238b26d920f5cbe32af926
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date: Wed Jul 6 01:04:31 2016 +0200
Add exact, randomized and QUIC SVD decomposition policies; meant to be used in conjunction with the PCA class.
>---------------------------------------------------------------
057b1c35206cf68ace238b26d920f5cbe32af926
.../decomposition_policies}/CMakeLists.txt | 5 +-
.../decomposition_policies/exact_svd_method.hpp | 72 ++++++++++++++++
.../pca/decomposition_policies/quic_svd_method.hpp | 92 ++++++++++++++++++++
.../randomized_svd_method.hpp | 97 ++++++++++++++++++++++
4 files changed, 264 insertions(+), 2 deletions(-)
diff --git a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
similarity index 85%
copy from src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
copy to src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
index 99b6b80..968c7cc 100644
--- a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
+++ b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
@@ -1,8 +1,9 @@
# Define the files we need to compile
# Anything not in this list will not be compiled into mlpack.
set(SOURCES
- max_pooling.hpp
- mean_pooling.hpp
+ exact_svd_method.hpp
+ randomized_svd_method.hpp
+ quic_svd_method.hpp
)
# Add directory name to sources.
diff --git a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
new file mode 100644
index 0000000..7b24b15
--- /dev/null
+++ b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
@@ -0,0 +1,72 @@
+/**
+ * @file exact_svd_method.hpp
+ * @author Ajinkya Kale
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the exact svd method for use in the Principal Components
+ * Analysis method.
+ */
+
+#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_EXACT_SVD_METHOD_HPP
+#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_EXACT_SVD_METHOD_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace pca {
+
+/**
+ * Implementation of the exact SVD policy.
+ */
+class ExactSVDPolicy
+{
+ public:
+ /**
+ * Apply Principal Component Analysis to the provided data set using the
+ * exact SVD method.
+ *
+ * @param data Data matrix.
+ * @param centeredData Centered data matrix.
+ * @param transformedData Matrix to put results of PCA into.
+ * @param eigVal Vector to put eigenvalues into.
+ * @param eigvec Matrix to put eigenvectors (loadings) into.
+ * @param rank Rank of the decomposition.
+ */
+ void Apply(const arma::mat& data,
+ const arma::mat& centeredData,
+ arma::mat& transformedData,
+ arma::vec& eigVal,
+ arma::mat& eigvec,
+ const size_t /* rank */)
+ {
+ // This matrix will store the right singular values; we do not need them.
+ arma::mat v;
+
+ // Do singular value decomposition. Use the economical singular value
+ // decomposition if the columns are much larger than the rows.
+ if (data.n_rows < data.n_cols)
+ {
+ // Do economical singular value decomposition and compute only the left
+ // singular vectors.
+ arma::svd_econ(eigvec, eigVal, v, centeredData, 'l');
+ }
+ else
+ {
+ arma::svd(eigvec, eigVal, v, centeredData);
+ }
+
+ // Now we must square the singular values to get the eigenvalues.
+ // In addition we must divide by the number of points, because the
+ // covariance matrix is X * X' / (N - 1).
+ eigVal %= eigVal / (data.n_cols - 1);
+
+ // Project the samples to the principals.
+ transformedData = arma::trans(eigvec) * centeredData;
+ }
+};
+
+} // namespace pca
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
new file mode 100644
index 0000000..c866a8f
--- /dev/null
+++ b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
@@ -0,0 +1,92 @@
+/**
+ * @file quic_svd_method.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the QUIC-SVD policy for use in the Principal Components
+ * Analysis method.
+ */
+
+#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_QUIC_SVD_METHOD_HPP
+#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_QUIC_SVD_METHOD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/quic_svd/quic_svd.hpp>
+
+namespace mlpack {
+namespace pca {
+
+/**
+ * Implementation of the QUIC-SVD policy.
+ */
+class QUICSVDPolicy
+{
+ public:
+
+ /**
+ * Use QUIC-SVD method to perform the principal components analysis (PCA).
+ *
+ * @param epsilon Error tolerance fraction for calculated subspace.
+ * @param delta Cumulative probability for Monte Carlo error lower bound.
+ */
+ QUICSVDPolicy(const double epsilon = 0.03, const double delta = 0.1) :
+ epsilon(epsilon),
+ delta(delta)
+ {
+ /* Nothing to do here */
+ }
+
+ /**
+ * Apply Principal Component Analysis to the provided data set using the
+ * QUIC-SVD method.
+ *
+ * @param data Data matrix.
+ * @param centeredData Centered data matrix.
+ * @param transformedData Matrix to put results of PCA into.
+ * @param eigVal Vector to put eigenvalues into.
+ * @param eigvec Matrix to put eigenvectors (loadings) into.
+ * @param rank Rank of the decomposition.
+ */
+ void Apply(const arma::mat& data,
+ const arma::mat& centeredData,
+ arma::mat& transformedData,
+ arma::vec& eigVal,
+ arma::mat& eigvec,
+ const size_t /* rank */)
+ {
+ // This matrix will store the right singular values; we do not need them.
+ arma::mat v, sigma;
+
+ // Do singular value decomposition using the QUIC-SVD algorithm.
+ svd::QUIC_SVD quicsvd(centeredData, eigvec, v, sigma, epsilon, delta);
+
+ // Now we must square the singular values to get the eigenvalues.
+ // In addition we must divide by the number of points, because the
+ // covariance matrix is X * X' / (N - 1).
+ eigVal = arma::pow(arma::diagvec(sigma), 2) / (data.n_cols - 1);
+
+ // Project the samples to the principals.
+ transformedData = arma::trans(eigvec) * centeredData;
+ }
+
+ //! Get the error tolerance fraction for calculated subspace.
+ double Epsilon() const { return epsilon; }
+ //! Modify the error tolerance fraction for calculated subspace.
+ double& Epsilon() { return epsilon; }
+
+ //! Get the cumulative probability for Monte Carlo error lower bound.
+ double Delta() const { return delta; }
+ //! Modify the cumulative probability for Monte Carlo error lower bound.
+ double& Delta() { return delta; }
+
+ private:
+ //! Error tolerance fraction for calculated subspace.
+ double epsilon;
+
+ //! Cumulative probability for Monte Carlo error lower bound.
+ double delta;
+};
+
+} // namespace pca
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
new file mode 100644
index 0000000..767eb9b
--- /dev/null
+++ b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
@@ -0,0 +1,97 @@
+/**
+ * @file randomized_svd_method.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the randomized svd method for use in the Principal
+ * Components Analysis method.
+ */
+
+#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
+#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/randomized_svd/randomized_svd.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+
+namespace mlpack {
+namespace pca {
+
+/**
+ * Implementation of the randomized SVD policy.
+ */
+class RandomizedSVDPolicy
+{
+ public:
+ /**
+ * Use randomized SVD method to perform the principal components analysis
+ * (PCA).
+ *
+ * @param iteratedPower Size of the normalized power iterations
+ * (Default: rank + 2).
+ * @param maxIterations Number of iterations for the power method
+ * (Default: 2).
+ */
+ RandomizedSVDPolicy(const size_t iteratedPower = 0,
+ const size_t maxIterations = 2) :
+ iteratedPower(iteratedPower),
+ maxIterations(maxIterations)
+ {
+ /* Nothing to do here */
+ }
+
+ /**
+ * Apply Principal Component Analysis to the provided data set using the
+ * randomized SVD.
+ *
+ * @param data Data matrix.
+ * @param centeredData Centered data matrix.
+ * @param transformedData Matrix to put results of PCA into.
+ * @param eigVal Vector to put eigenvalues into.
+ * @param eigvec Matrix to put eigenvectors (loadings) into.
+ * @param rank Rank of the decomposition.
+ */
+ void Apply(const arma::mat& data,
+ const arma::mat& centeredData,
+ arma::mat& transformedData,
+ arma::vec& eigVal,
+ arma::mat& eigvec,
+ const size_t rank)
+ {
+ // This matrix will store the right singular values; we do not need them.
+ arma::mat v;
+
+ // Do singular value decomposition using the randomized SVD algorithm.
+ svd::RandomizedSVD rsvd(iteratedPower, maxIterations);
+ rsvd.Apply(data, eigvec, eigVal, v, rank);
+
+ // Now we must square the singular values to get the eigenvalues.
+ // In addition we must divide by the number of points, because the
+ // covariance matrix is X * X' / (N - 1).
+ eigVal %= eigVal / (data.n_cols - 1);
+
+ // Project the samples to the principals.
+ transformedData = arma::trans(eigvec) * centeredData;
+ }
+
+ //! Get the size of the normalized power iterations.
+ size_t IteratedPower() const { return iteratedPower; }
+ //! Modify the size of the normalized power iterations.
+ size_t& IteratedPower() { return iteratedPower; }
+
+ //! Get the number of iterations for the power method.
+ size_t MaxIterations() const { return maxIterations; }
+ //! Modify the number of iterations for the power method.
+ size_t& MaxIterations() { return maxIterations; }
+
+ private:
+ //! Locally stored size of the normalized power iterations.
+ size_t iteratedPower;
+
+ //! Locally stored number of iterations for the power method.
+ size_t maxIterations;
+};
+
+} // namespace pca
+} // namespace mlpack
+
+#endif
More information about the mlpack-git
mailing list